source: GTP/trunk/Lib/Vis/Preprocessing/src/sparsehash/src/google/sparse_hash_set @ 2162

Revision 2162, 9.2 KB checked in by mattausch, 17 years ago (diff)

improved hash performance with google hashmap

Line 
1// Copyright (c) 2005, Google Inc.
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are
6// met:
7//
8//     * Redistributions of source code must retain the above copyright
9// notice, this list of conditions and the following disclaimer.
10//     * Redistributions in binary form must reproduce the above
11// copyright notice, this list of conditions and the following disclaimer
12// in the documentation and/or other materials provided with the
13// distribution.
14//     * Neither the name of Google Inc. nor the names of its
15// contributors may be used to endorse or promote products derived from
16// this software without specific prior written permission.
17//
18// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30// ---
31// Author: Craig Silverstein
32//
33// This is just a very thin wrapper over sparsehashtable.h, just
34// like sgi stl's stl_hash_set is a very thin wrapper over
35// stl_hashtable.  The major thing we define is operator[], because
36// we have a concept of a data_type which stl_hashtable doesn't
37// (it only has a key and a value).
38//
39// This is more different from sparse_hash_map than you might think,
40// because all iterators for sets are const (you obviously can't
41// change the key, and for sets there is no value).
42//
43// We adhere mostly to the STL semantics for hash-set.  One important
44// exception is that insert() invalidates iterators entirely.  On the
45// plus side, though, delete() doesn't invalidate iterators at all, or
46// even change the ordering of elements.
47//
48// Here are a few "power user" tips:
49//
50//    1) set_deleted_key():
51//         Unlike STL's hash_map, if you want to use erase() you
52//         must call set_deleted_key() after construction.
53//
54//    2) resize(0):
55//         When an item is deleted, its memory isn't freed right
56//         away.  This allows you to iterate over a hashtable,
57//         and call erase(), without invalidating the iterator.
58//         To force the memory to be freed, call resize(0).
59//
60// Guide to what kind of hash_set to use:
61//   (1) dense_hash_set: fastest, uses the most memory
62//   (2) sparse_hash_set: slowest, uses the least memory
63//   (3) hash_set (STL): in the middle
64// Typically I use sparse_hash_set when I care about space and/or when
65// I need to save the hashtable on disk.  I use hash_set otherwise.  I
66// don't personally use dense_hash_set ever; the only use of
67// dense_hash_set I know of is to work around malloc() bugs in some
68// systems (dense_hash_set has a particularly simple allocation scheme).
69//
70// - dense_hash_set has, typically, a factor of 2 memory overhead (if your
71//   data takes up X bytes, the hash_set uses X more bytes in overhead).
72// - sparse_hash_set has about 2 bits overhead per entry.
73// - sparse_hash_map can be 3-7 times slower than the others for lookup and,
74//   especially, inserts.  See time_hash_map.cc for details.
75//
76// See /usr/(local/)?doc/sparsehash-0.1/sparse_hash_set.html
77// for information about how to use this class.
78
79#ifndef _SPARSE_HASH_SET_H_
80#define _SPARSE_HASH_SET_H_
81
82#include <google/sparsehash/config.h>
83#include <stdio.h>                    // for FILE * in read()/write()
84#include <algorithm>                  // for the default template args
85#include <functional>                // for equal_to
86#include <memory>                    // for alloc<>
87#include <google/sparsehash/hash_fun.h>
88#include <google/sparsehash/sparsehashtable.h>
89
90_START_GOOGLE_NAMESPACE_
91
92using STL_NAMESPACE::pair;
93
94template <class Value,
95          class HashFcn = HASH_NAMESPACE::hash<Value>,
96          class EqualKey = STL_NAMESPACE::equal_to<Value>,
97          class Alloc = STL_NAMESPACE::allocator<Value> >
98class sparse_hash_set {
99
100 private:
101  // Apparently identity is not stl-standard, so we define our own
102  struct Identity {
103    Value& operator()(Value& v) const { return v; }
104    const Value& operator()(const Value& v) const { return v; }
105  };
106
107  // The actual data
108  typedef sparse_hashtable<Value, Value, HashFcn, Identity, EqualKey, Alloc> ht;
109  ht rep;
110
111 public:
112  typedef typename ht::key_type key_type;
113  typedef typename ht::value_type value_type;
114  typedef typename ht::hasher hasher;
115  typedef typename ht::key_equal key_equal;
116
117  typedef typename ht::size_type size_type;
118  typedef typename ht::difference_type difference_type;
119  typedef typename ht::const_pointer pointer;
120  typedef typename ht::const_pointer const_pointer;
121  typedef typename ht::const_reference reference;
122  typedef typename ht::const_reference const_reference;
123
124  typedef typename ht::const_iterator iterator;
125  typedef typename ht::const_iterator const_iterator;
126
127
128  // Iterator functions -- recall all iterators are const
129  iterator begin() const              { return rep.begin(); }
130  iterator end() const                { return rep.end(); }
131
132
133  // Accessor functions
134  hasher hash_funct() const { return rep.hash_funct(); }
135  key_equal key_eq() const  { return rep.key_eq(); }
136
137
138  // Constructors
139  explicit sparse_hash_set(size_type n = 0,
140                           const hasher& hf = hasher(),
141                           const key_equal& eql = key_equal())
142    : rep(n, hf, eql) { }
143
144  template <class InputIterator>
145  sparse_hash_set(InputIterator f, InputIterator l,
146                  size_type n = 0,
147                  const hasher& hf = hasher(),
148                  const key_equal& eql = key_equal()) {
149    rep.insert(f, l);
150  }
151  // We use the default copy constructor
152  // We use the default operator=()
153  // We use the default destructor
154
155  void clear()                        { rep.clear(); }
156  void swap(sparse_hash_set& hs)      { rep.swap(hs.rep); }
157
158
159  // Functions concerning size
160  size_type size() const              { return rep.size(); }
161  size_type max_size() const          { return rep.max_size(); }
162  bool empty() const                  { return rep.empty(); }
163  size_type bucket_count() const      { return rep.bucket_count(); }
164  size_type max_bucket_count() const  { return rep.max_bucket_count(); }
165
166  void resize(size_type hint)         { rep.resize(hint); }
167
168
169  // Lookup routines
170  iterator find(const key_type& key) const           { return rep.find(key); }
171
172  size_type count(const key_type& key) const         { return rep.count(key); }
173
174  pair<iterator, iterator> equal_range(const key_type& key) const {
175    return rep.equal_range(key);
176  }
177
178  // Insertion routines
179  pair<iterator, bool> insert(const value_type& obj) {
180    pair<typename ht::iterator, bool> p = rep.insert(obj);
181    return pair<iterator, bool>(p.first, p.second);   // const to non-const
182  }
183  template <class InputIterator>
184  void insert(InputIterator f, InputIterator l)      { rep.insert(f, l); }
185  void insert(const_iterator f, const_iterator l)    { rep.insert(f, l); }
186  // required for std::insert_iterator; the passed-in iterator is ignored
187  iterator insert(iterator, const value_type& obj)   { return insert(obj).first; }
188
189
190  // Deletion routines
191  // THESE ARE NON-STANDARD!  I make you specify an "impossible" key
192  // value to identify deleted buckets.  You can change the key as
193  // time goes on, or get rid of it entirely to be insert-only.
194  void set_deleted_key(const key_type& key)   { rep.set_deleted_key(key); }
195  void clear_deleted_key()                    { rep.clear_deleted_key(); }
196
197  // These are standard
198  size_type erase(const key_type& key)               { return rep.erase(key); }
199  void erase(iterator it)                            { rep.erase(it); }
200  void erase(iterator f, iterator l)                 { rep.erase(f, l); }
201
202
203  // Comparison
204  bool operator==(const sparse_hash_set& hs) const   { return rep == hs.rep; }
205  bool operator!=(const sparse_hash_set& hs) const   { return rep != hs.rep; }
206
207
208  // I/O -- this is an add-on for writing metainformation to disk
209  bool write_metadata(FILE *fp)       { return rep.write_metadata(fp); }
210  bool read_metadata(FILE *fp)        { return rep.read_metadata(fp); }
211  bool write_nopointer_data(FILE *fp) { return rep.write_nopointer_data(fp); }
212  bool read_nopointer_data(FILE *fp)  { return rep.read_nopointer_data(fp); }
213};
214
215template <class Val, class HashFcn, class EqualKey, class Alloc>
216inline void swap(sparse_hash_set<Val, HashFcn, EqualKey, Alloc>& hs1,
217                 sparse_hash_set<Val, HashFcn, EqualKey, Alloc>& hs2) {
218  hs1.swap(hs2);
219}
220
221_END_GOOGLE_NAMESPACE_
222
223#endif /* _SPARSE_HASH_SET_H_ */
Note: See TracBrowser for help on using the repository browser.