1 | // Copyright (c) 2005, Google Inc.
|
---|
2 | // All rights reserved.
|
---|
3 | //
|
---|
4 | // Redistribution and use in source and binary forms, with or without
|
---|
5 | // modification, are permitted provided that the following conditions are
|
---|
6 | // met:
|
---|
7 | //
|
---|
8 | // * Redistributions of source code must retain the above copyright
|
---|
9 | // notice, this list of conditions and the following disclaimer.
|
---|
10 | // * Redistributions in binary form must reproduce the above
|
---|
11 | // copyright notice, this list of conditions and the following disclaimer
|
---|
12 | // in the documentation and/or other materials provided with the
|
---|
13 | // distribution.
|
---|
14 | // * Neither the name of Google Inc. nor the names of its
|
---|
15 | // contributors may be used to endorse or promote products derived from
|
---|
16 | // this software without specific prior written permission.
|
---|
17 | //
|
---|
18 | // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
---|
19 | // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
---|
20 | // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
---|
21 | // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
---|
22 | // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
---|
23 | // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
---|
24 | // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
---|
25 | // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
---|
26 | // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
---|
27 | // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
---|
28 | // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
---|
29 |
|
---|
30 | // ---
|
---|
31 | // Author: Craig Silverstein
|
---|
32 | //
|
---|
33 | // A sparsetable is a random container that implements a sparse array,
|
---|
34 | // that is, an array that uses very little memory to store unassigned
|
---|
35 | // indices (in this case, between 1-2 bits per unassigned index). For
|
---|
36 | // instance, if you allocate an array of size 5 and assign a[2] = <big
|
---|
37 | // struct>, then a[2] will take up a lot of memory but a[0], a[1],
|
---|
38 | // a[3], and a[4] will not. Array elements that have a value are
|
---|
39 | // called "assigned". Array elements that have no value yet, or have
|
---|
40 | // had their value cleared using erase() or clear(), are called
|
---|
41 | // "unassigned".
|
---|
42 | //
|
---|
43 | // Unassigned values seem to have the default value of T (see below).
|
---|
44 | // Nevertheless, there is a difference between an unassigned index and
|
---|
45 | // one explicitly assigned the value of T(). The latter is considered
|
---|
46 | // assigned.
|
---|
47 | //
|
---|
48 | // Access to an array element is constant time, as is insertion and
|
---|
49 | // deletion. Insertion and deletion may be fairly slow, however:
|
---|
50 | // because of this container's memory economy, each insert and delete
|
---|
51 | // causes a memory reallocation.
|
---|
52 | //
|
---|
53 | // See /usr/(local/)?doc/sparsehash-0.1/sparsetable.html
|
---|
54 | // for information about how to use this class.
|
---|
55 |
|
---|
56 | #ifndef _SPARSETABLE_H_
|
---|
57 | #define _SPARSETABLE_H_
|
---|
58 |
|
---|
59 | #include <google/sparsehash/config.h>
|
---|
60 | #include <stdlib.h> // for malloc/free
|
---|
61 | #include <stdio.h> // to read/write tables
|
---|
62 | #if defined HAVE_STDINT_H
|
---|
63 | #include <stdint.h> // to get uint16_t (ISO naming madness)
|
---|
64 | #elif defined HAVE_INTTYPES_H
|
---|
65 | #include <inttypes.h> // another place uint16_t might be defined
|
---|
66 | #else
|
---|
67 | #include <sys/types.h> // our last best hope
|
---|
68 | #endif
|
---|
69 | #include <assert.h> // for bounds checking
|
---|
70 | #include <iterator> // to define reverse_iterator<> for me
|
---|
71 | #include <vector> // a sparsetable is a vector of groups
|
---|
72 |
|
---|
73 | #if STDC_HEADERS
|
---|
74 | #include <string.h> // for memcpy and mmemmove
|
---|
75 | #else
|
---|
76 | #if !HAVE_MEMCPY
|
---|
77 | #define memcpy(d, s, n) bcopy ((s), (d), (n))
|
---|
78 | #define memmove(d, s, n) bcopy ((s), (d), (n))
|
---|
79 | #endif
|
---|
80 | #endif
|
---|
81 |
|
---|
82 | #if !defined HAVE_U_INT16_T && defined HAVE___UINT16
|
---|
83 | typedef __uint16 u_int16_t // true on vc++7
|
---|
84 | #endif
|
---|
85 |
|
---|
86 | _START_GOOGLE_NAMESPACE_
|
---|
87 |
|
---|
88 | using STL_NAMESPACE::vector;
|
---|
89 |
|
---|
90 | // The smaller this is, the faster lookup is (because the group bitmap is
|
---|
91 | // smaller) and the faster insert is, because there's less to memmove.
|
---|
92 | // On the other hand, there are more groups. Since group::size_type is
|
---|
93 | // a short, this number should be of the form 32*x + 16 to avoid waste.
|
---|
94 | static const uint16_t DEFAULT_SPARSEGROUP_SIZE = 48; // fits in 1.5 words
|
---|
95 |
|
---|
96 |
|
---|
97 | // A NOTE ON ASSIGNING:
|
---|
98 | // A sparse table does not actually allocate memory for entries
|
---|
99 | // that are not filled. Because of this, it becomes complicated
|
---|
100 | // to have a non-const iterator: we don't know, if the iterator points
|
---|
101 | // to a not-filled bucket, whether you plan to fill it with something
|
---|
102 | // or whether you plan to read its value (in which case you'll get
|
---|
103 | // the default bucket value). Therefore, while we can define const
|
---|
104 | // operations in a pretty 'normal' way, for non-const operations, we
|
---|
105 | // define something that returns a helper object with operator= and
|
---|
106 | // operator& that allocate a bucket lazily. We use this for table[]
|
---|
107 | // and also for regular table iterators.
|
---|
108 |
|
---|
109 | template <class tabletype>
|
---|
110 | class table_element_adaptor {
|
---|
111 | public:
|
---|
112 | typedef typename tabletype::value_type value_type;
|
---|
113 | typedef typename tabletype::size_type size_type;
|
---|
114 | typedef typename tabletype::reference reference;
|
---|
115 | typedef typename tabletype::pointer pointer;
|
---|
116 |
|
---|
117 | table_element_adaptor(tabletype *tbl, size_type p)
|
---|
118 | : table(tbl), pos(p) { }
|
---|
119 | table_element_adaptor& operator= (value_type val) {
|
---|
120 | table->set(pos, val);
|
---|
121 | return *this;
|
---|
122 | }
|
---|
123 | operator value_type() { return table->get(pos); } // we look like a value
|
---|
124 | pointer operator& () { return &table->mutating_get(pos); }
|
---|
125 |
|
---|
126 | private:
|
---|
127 | tabletype* table;
|
---|
128 | size_type pos;
|
---|
129 | };
|
---|
130 |
|
---|
131 | // Our iterator as simple as iterators can be: basically it's just
|
---|
132 | // the index into our table. Dereference, the only complicated
|
---|
133 | // thing, we punt to the table class. This just goes to show how
|
---|
134 | // much machinery STL requires to do even the most trivial tasks.
|
---|
135 | //
|
---|
136 | // By templatizing over tabletype, we have one iterator type which
|
---|
137 | // we can use for both sparsetables and sparsebins. In fact it
|
---|
138 | // works on any class that allows size() and operator[] (eg vector),
|
---|
139 | // as long as it does the standard STL typedefs too (eg value_type).
|
---|
140 |
|
---|
141 | template <class tabletype>
|
---|
142 | class table_iterator {
|
---|
143 | public:
|
---|
144 | typedef table_iterator iterator;
|
---|
145 |
|
---|
146 | #ifdef UNDERSTANDS_ITERATOR_TAGS
|
---|
147 | typedef STL_NAMESPACE::random_access_iterator_tag iterator_category;
|
---|
148 | #endif
|
---|
149 | typedef typename tabletype::value_type value_type;
|
---|
150 | typedef typename tabletype::difference_type difference_type;
|
---|
151 | typedef typename tabletype::size_type size_type;
|
---|
152 | typedef table_element_adaptor<tabletype> reference;
|
---|
153 | typedef table_element_adaptor<tabletype>* pointer;
|
---|
154 |
|
---|
155 | // The "real" constructor
|
---|
156 | table_iterator(tabletype *tbl, size_type p)
|
---|
157 | : table(tbl), pos(p) { }
|
---|
158 | // The default constructor, used when I define vars of type table::iterator
|
---|
159 | table_iterator() : table(NULL), pos(0) { }
|
---|
160 | // The copy constructor, for when I say table::iterator foo = tbl.begin()
|
---|
161 | // The default destructor is fine; we don't define one
|
---|
162 | // The default operator= is fine; we don't define one
|
---|
163 |
|
---|
164 | // The main thing our iterator does is dereference. If the table entry
|
---|
165 | // we point to is empty, we return the default value type.
|
---|
166 | // This is the big different function from the const iterator.
|
---|
167 | reference operator*() {
|
---|
168 | return table_element_adaptor<tabletype>(table, pos);
|
---|
169 | }
|
---|
170 | pointer operator->() { return &(operator*()); }
|
---|
171 |
|
---|
172 | // Helper function to assert things are ok; eg pos is still in range
|
---|
173 | void check() const {
|
---|
174 | assert(table);
|
---|
175 | assert(pos <= table->size());
|
---|
176 | }
|
---|
177 |
|
---|
178 | // Arithmetic: we just do arithmetic on pos. We don't even need to
|
---|
179 | // do bounds checking, since STL doesn't consider that it's job. :-)
|
---|
180 | iterator& operator+=(size_type t) { pos += t; check(); return *this; }
|
---|
181 | iterator& operator-=(size_type t) { pos -= t; check(); return *this; }
|
---|
182 | iterator& operator++() { ++pos; check(); return *this; }
|
---|
183 | iterator& operator--() { --pos; check(); return *this; }
|
---|
184 | iterator operator++(int) { iterator tmp(*this); // for x++
|
---|
185 | ++pos; check(); return tmp; }
|
---|
186 | iterator operator--(int) { iterator tmp(*this); // for x--
|
---|
187 | --pos; check(); return tmp; }
|
---|
188 | iterator operator+(difference_type i) const { iterator tmp(*this);
|
---|
189 | tmp += i; return tmp; }
|
---|
190 | iterator operator-(difference_type i) const { iterator tmp(*this);
|
---|
191 | tmp -= i; return tmp; }
|
---|
192 | difference_type operator-(iterator it) const { // for "x = it2 - it"
|
---|
193 | assert(table == it.table);
|
---|
194 | return pos - it.pos;
|
---|
195 | }
|
---|
196 | reference operator[](difference_type n) const {
|
---|
197 | return *(*this + n); // simple though not totally efficient
|
---|
198 | }
|
---|
199 |
|
---|
200 | // Comparisons.
|
---|
201 | bool operator==(const iterator& it) const {
|
---|
202 | return table == it.table && pos == it.pos;
|
---|
203 | }
|
---|
204 | bool operator<(const iterator& it) const {
|
---|
205 | assert(table == it.table); // life is bad bad bad otherwise
|
---|
206 | return pos < it.pos;
|
---|
207 | }
|
---|
208 | bool operator<=(const iterator& it) const {
|
---|
209 | assert(table == it.table);
|
---|
210 | return pos <= it.pos;
|
---|
211 | }
|
---|
212 | bool operator!=(const iterator& it) const { return !(*this == it); }
|
---|
213 | bool operator>(const iterator& it) const { return it <= *this; }
|
---|
214 | bool operator>=(const iterator& it) const { return it < *this; }
|
---|
215 |
|
---|
216 | // Here's the info we actually need to be an iterator
|
---|
217 | tabletype *table; // so we can dereference and bounds-check
|
---|
218 | size_type pos; // index into the table
|
---|
219 | };
|
---|
220 |
|
---|
221 | // support for "3 + iterator" has to be defined outside the class, alas
|
---|
222 | template<class T>
|
---|
223 | table_iterator<T> operator+(typename table_iterator<T>::difference_type i,
|
---|
224 | table_iterator<T> it) {
|
---|
225 | return it + i; // so people can say it2 = 3 + it
|
---|
226 | }
|
---|
227 |
|
---|
228 | template <class tabletype>
|
---|
229 | class const_table_iterator {
|
---|
230 | public:
|
---|
231 | typedef table_iterator<tabletype> iterator;
|
---|
232 | typedef const_table_iterator const_iterator;
|
---|
233 |
|
---|
234 | #ifdef UNDERSTANDS_ITERATOR_TAGS
|
---|
235 | typedef STL_NAMESPACE::random_access_iterator_tag iterator_category;
|
---|
236 | #endif
|
---|
237 | typedef typename tabletype::value_type value_type;
|
---|
238 | typedef typename tabletype::difference_type difference_type;
|
---|
239 | typedef typename tabletype::size_type size_type;
|
---|
240 | typedef typename tabletype::const_reference reference; // we're const-only
|
---|
241 | typedef typename tabletype::const_pointer pointer;
|
---|
242 |
|
---|
243 | // The "real" constructor
|
---|
244 | const_table_iterator(const tabletype *tbl, size_type p)
|
---|
245 | : table(tbl), pos(p) { }
|
---|
246 | // The default constructor, used when I define vars of type table::iterator
|
---|
247 | const_table_iterator() : table(NULL), pos(0) { }
|
---|
248 | // The copy constructor, for when I say table::iterator foo = tbl.begin()
|
---|
249 | // Also converts normal iterators to const iterators
|
---|
250 | const_table_iterator(const iterator &from)
|
---|
251 | : table(from.table), pos(from.pos) { }
|
---|
252 | // The default destructor is fine; we don't define one
|
---|
253 | // The default operator= is fine; we don't define one
|
---|
254 |
|
---|
255 | // The main thing our iterator does is dereference. If the table entry
|
---|
256 | // we point to is empty, we return the default value type.
|
---|
257 | reference operator*() const { return (*table)[pos]; }
|
---|
258 | pointer operator->() const { return &(operator*()); }
|
---|
259 |
|
---|
260 | // Helper function to assert things are ok; eg pos is still in range
|
---|
261 | void check() const {
|
---|
262 | assert(table);
|
---|
263 | assert(pos <= table->size());
|
---|
264 | }
|
---|
265 |
|
---|
266 | // Arithmetic: we just do arithmetic on pos. We don't even need to
|
---|
267 | // do bounds checking, since STL doesn't consider that it's job. :-)
|
---|
268 | const_iterator& operator+=(size_type t) { pos += t; check(); return *this; }
|
---|
269 | const_iterator& operator-=(size_type t) { pos -= t; check(); return *this; }
|
---|
270 | const_iterator& operator++() { ++pos; check(); return *this; }
|
---|
271 | const_iterator& operator--() { --pos; check(); return *this; }
|
---|
272 | const_iterator operator++(int) { const_iterator tmp(*this); // for x++
|
---|
273 | ++pos; check(); return tmp; }
|
---|
274 | const_iterator operator--(int) { const_iterator tmp(*this); // for x--
|
---|
275 | --pos; check(); return tmp; }
|
---|
276 | const_iterator operator+(difference_type i) const { const_iterator tmp(*this);
|
---|
277 | tmp += i; return tmp; }
|
---|
278 | const_iterator operator-(difference_type i) const { const_iterator tmp(*this);
|
---|
279 | tmp -= i; return tmp; }
|
---|
280 | difference_type operator-(const_iterator it) const { // for "x = it2 - it"
|
---|
281 | assert(table == it.table);
|
---|
282 | return pos - it.pos;
|
---|
283 | }
|
---|
284 | reference operator[](difference_type n) const {
|
---|
285 | return *(*this + n); // simple though not totally efficient
|
---|
286 | }
|
---|
287 |
|
---|
288 | // Comparisons.
|
---|
289 | bool operator==(const const_iterator& it) const {
|
---|
290 | return table == it.table && pos == it.pos;
|
---|
291 | }
|
---|
292 | bool operator<(const const_iterator& it) const {
|
---|
293 | assert(table == it.table); // life is bad bad bad otherwise
|
---|
294 | return pos < it.pos;
|
---|
295 | }
|
---|
296 | bool operator<=(const const_iterator& it) const {
|
---|
297 | assert(table == it.table);
|
---|
298 | return pos <= it.pos;
|
---|
299 | }
|
---|
300 | bool operator!=(const const_iterator& it) const { return !(*this == it); }
|
---|
301 | bool operator>(const const_iterator& it) const { return it <= *this; }
|
---|
302 | bool operator>=(const const_iterator& it) const { return it < *this; }
|
---|
303 |
|
---|
304 | // Here's the info we actually need to be an iterator
|
---|
305 | const tabletype *table; // so we can dereference and bounds-check
|
---|
306 | size_type pos; // index into the table
|
---|
307 | };
|
---|
308 |
|
---|
309 | // support for "3 + iterator" has to be defined outside the class, alas
|
---|
310 | template<class T>
|
---|
311 | const_table_iterator<T> operator+(typename
|
---|
312 | const_table_iterator<T>::difference_type i,
|
---|
313 | const_table_iterator<T> it) {
|
---|
314 | return it + i; // so people can say it2 = 3 + it
|
---|
315 | }
|
---|
316 |
|
---|
317 |
|
---|
318 | // ---------------------------------------------------------------------------
|
---|
319 |
|
---|
320 |
|
---|
321 | /*
|
---|
322 | // This is a 2-D iterator. You specify a begin and end over a list
|
---|
323 | // of *containers*. We iterate over each container by iterating over
|
---|
324 | // it. It's actually simple:
|
---|
325 | // VECTOR.begin() VECTOR[0].begin() --------> VECTOR[0].end() ---\
|
---|
326 | // | ________________________________________________/
|
---|
327 | // | \_> VECTOR[1].begin() --------> VECTOR[1].end() -\
|
---|
328 | // | ___________________________________________________/
|
---|
329 | // v \_> ......
|
---|
330 | // VECTOR.end()
|
---|
331 | //
|
---|
332 | // It's impossible to do random access on one of these things in constant
|
---|
333 | // time, so it's just a bidirectional iterator.
|
---|
334 | //
|
---|
335 | // Unfortunately, because we need to use this for a non-empty iterator,
|
---|
336 | // we use nonempty_begin() and nonempty_end() instead of begin() and end()
|
---|
337 | // (though only going across, not down).
|
---|
338 | */
|
---|
339 |
|
---|
340 | #define TWOD_BEGIN_ nonempty_begin
|
---|
341 | #define TWOD_END_ nonempty_end
|
---|
342 | #define TWOD_ITER_ nonempty_iterator
|
---|
343 | #define TWOD_CONST_ITER_ const_nonempty_iterator
|
---|
344 |
|
---|
345 | template <class containertype>
|
---|
346 | class two_d_iterator {
|
---|
347 | public:
|
---|
348 | typedef two_d_iterator iterator;
|
---|
349 |
|
---|
350 | #ifdef UNDERSTANDS_ITERATOR_TAGS
|
---|
351 | typedef STL_NAMESPACE::bidirectional_iterator_tag iterator_category;
|
---|
352 | #endif
|
---|
353 | // apparently some versions of VC++ have trouble with two ::'s in a typename
|
---|
354 | typedef typename containertype::value_type _tmp_vt;
|
---|
355 | typedef typename _tmp_vt::value_type value_type;
|
---|
356 | typedef typename _tmp_vt::difference_type difference_type;
|
---|
357 | typedef typename _tmp_vt::reference reference;
|
---|
358 | typedef typename _tmp_vt::pointer pointer;
|
---|
359 |
|
---|
360 | // The "real" constructor. begin and end specify how many rows we have
|
---|
361 | // (in the diagram above); we always iterate over each row completely.
|
---|
362 | two_d_iterator(typename containertype::iterator begin,
|
---|
363 | typename containertype::iterator end,
|
---|
364 | typename containertype::iterator curr)
|
---|
365 | : row_begin(begin), row_end(end), row_current(curr), col_current() {
|
---|
366 | if ( row_current != row_end ) {
|
---|
367 | col_current = row_current->TWOD_BEGIN_();
|
---|
368 | advance_past_end(); // in case cur->begin() == cur->end()
|
---|
369 | }
|
---|
370 | }
|
---|
371 | // If you want to start at an arbitrary place, you can, I guess
|
---|
372 | two_d_iterator(typename containertype::iterator begin,
|
---|
373 | typename containertype::iterator end,
|
---|
374 | typename containertype::iterator curr,
|
---|
375 | typename containertype::value_type::TWOD_ITER_ col)
|
---|
376 | : row_begin(begin), row_end(end), row_current(curr), col_current(col) {
|
---|
377 | advance_past_end(); // in case cur->begin() == cur->end()
|
---|
378 | }
|
---|
379 | // The default constructor, used when I define vars of type table::iterator
|
---|
380 | two_d_iterator() : row_begin(), row_end(), row_current(), col_current() { }
|
---|
381 | // The default destructor is fine; we don't define one
|
---|
382 | // The default operator= is fine; we don't define one
|
---|
383 |
|
---|
384 | // Happy dereferencer
|
---|
385 | reference operator*() const { return *col_current; }
|
---|
386 | pointer operator->() const { return &(operator*()); }
|
---|
387 |
|
---|
388 | // Arithmetic: we just do arithmetic on pos. We don't even need to
|
---|
389 | // do bounds checking, since STL doesn't consider that it's job. :-)
|
---|
390 | // NOTE: this is not amortized constant time! What do we do about it?
|
---|
391 | void advance_past_end() { // used when col_current points to end()
|
---|
392 | while ( col_current == row_current->TWOD_END_() ) { // end of current row
|
---|
393 | ++row_current; // go to beginning of next
|
---|
394 | if ( row_current != row_end ) // col is irrelevant at end
|
---|
395 | col_current = row_current->TWOD_BEGIN_();
|
---|
396 | else
|
---|
397 | break; // don't go past row_end
|
---|
398 | }
|
---|
399 | }
|
---|
400 |
|
---|
401 | iterator& operator++() {
|
---|
402 | assert(row_current != row_end); // how to ++ from there?
|
---|
403 | ++col_current;
|
---|
404 | advance_past_end(); // in case col_current is at end()
|
---|
405 | return *this;
|
---|
406 | }
|
---|
407 | iterator& operator--() {
|
---|
408 | while ( row_current == row_end ||
|
---|
409 | col_current == row_current->TWOD_BEGIN_() ) {
|
---|
410 | assert(row_current != row_begin);
|
---|
411 | --row_current;
|
---|
412 | col_current = row_current->TWOD_END_(); // this is 1 too far
|
---|
413 | }
|
---|
414 | --col_current;
|
---|
415 | return *this;
|
---|
416 | }
|
---|
417 | iterator operator++(int) { iterator tmp(*this); ++*this; return tmp; }
|
---|
418 | iterator operator--(int) { iterator tmp(*this); --*this; return tmp; }
|
---|
419 |
|
---|
420 |
|
---|
421 | // Comparisons.
|
---|
422 | bool operator==(const iterator& it) const {
|
---|
423 | return ( row_begin == it.row_begin &&
|
---|
424 | row_end == it.row_end &&
|
---|
425 | row_current == it.row_current &&
|
---|
426 | (row_current == row_end || col_current == it.col_current) );
|
---|
427 | }
|
---|
428 | bool operator!=(const iterator& it) const { return !(*this == it); }
|
---|
429 |
|
---|
430 |
|
---|
431 | // Here's the info we actually need to be an iterator
|
---|
432 | // These need to be public so we convert from iterator to const_iterator
|
---|
433 | typename containertype::iterator row_begin, row_end, row_current;
|
---|
434 | typename containertype::value_type::TWOD_ITER_ col_current;
|
---|
435 | };
|
---|
436 |
|
---|
437 | // The same thing again, but this time const. :-(
|
---|
438 | template <class containertype>
|
---|
439 | class const_two_d_iterator {
|
---|
440 | public:
|
---|
441 | typedef const_two_d_iterator iterator;
|
---|
442 |
|
---|
443 | #ifdef UNDERSTANDS_ITERATOR_TAGS
|
---|
444 | typedef STL_NAMESPACE::bidirectional_iterator_tag iterator_category;
|
---|
445 | #endif
|
---|
446 | // apparently some versions of VC++ have trouble with two ::'s in a typename
|
---|
447 | typedef typename containertype::value_type _tmp_vt;
|
---|
448 | typedef typename _tmp_vt::value_type value_type;
|
---|
449 | typedef typename _tmp_vt::difference_type difference_type;
|
---|
450 | typedef typename _tmp_vt::const_reference reference;
|
---|
451 | typedef typename _tmp_vt::const_pointer pointer;
|
---|
452 |
|
---|
453 | const_two_d_iterator(typename containertype::const_iterator begin,
|
---|
454 | typename containertype::const_iterator end,
|
---|
455 | typename containertype::const_iterator curr)
|
---|
456 | : row_begin(begin), row_end(end), row_current(curr), col_current() {
|
---|
457 | if ( curr != end ) {
|
---|
458 | col_current = curr->TWOD_BEGIN_();
|
---|
459 | advance_past_end(); // in case cur->begin() == cur->end()
|
---|
460 | }
|
---|
461 | }
|
---|
462 | const_two_d_iterator(typename containertype::const_iterator begin,
|
---|
463 | typename containertype::const_iterator end,
|
---|
464 | typename containertype::const_iterator curr,
|
---|
465 | typename containertype::value_type::TWOD_CONST_ITER_ col)
|
---|
466 | : row_begin(begin), row_end(end), row_current(curr), col_current(col) {
|
---|
467 | advance_past_end(); // in case cur->begin() == cur->end()
|
---|
468 | }
|
---|
469 | const_two_d_iterator()
|
---|
470 | : row_begin(), row_end(), row_current(), col_current() {
|
---|
471 | }
|
---|
472 | // Need this explicitly so we can convert normal iterators to const iterators
|
---|
473 | const_two_d_iterator(const two_d_iterator<containertype>& it) :
|
---|
474 | row_begin(it.row_begin), row_end(it.row_end), row_current(it.row_current),
|
---|
475 | col_current(it.col_current) { }
|
---|
476 |
|
---|
477 | typename containertype::const_iterator row_begin, row_end, row_current;
|
---|
478 | typename containertype::value_type::TWOD_CONST_ITER_ col_current;
|
---|
479 |
|
---|
480 |
|
---|
481 | // EVERYTHING FROM HERE DOWN IS THE SAME AS THE NON-CONST ITERATOR
|
---|
482 | reference operator*() const { return *col_current; }
|
---|
483 | pointer operator->() const { return &(operator*()); }
|
---|
484 |
|
---|
485 | void advance_past_end() { // used when col_current points to end()
|
---|
486 | while ( col_current == row_current->TWOD_END_() ) { // end of current row
|
---|
487 | ++row_current; // go to beginning of next
|
---|
488 | if ( row_current != row_end ) // col is irrelevant at end
|
---|
489 | col_current = row_current->TWOD_BEGIN_();
|
---|
490 | else
|
---|
491 | break; // don't go past row_end
|
---|
492 | }
|
---|
493 | }
|
---|
494 | iterator& operator++() {
|
---|
495 | assert(row_current != row_end); // how to ++ from there?
|
---|
496 | ++col_current;
|
---|
497 | advance_past_end(); // in case col_current is at end()
|
---|
498 | return *this;
|
---|
499 | }
|
---|
500 | iterator& operator--() {
|
---|
501 | while ( row_current == row_end ||
|
---|
502 | col_current == row_current->TWOD_BEGIN_() ) {
|
---|
503 | assert(row_current != row_begin);
|
---|
504 | --row_current;
|
---|
505 | col_current = row_current->TWOD_END_(); // this is 1 too far
|
---|
506 | }
|
---|
507 | --col_current;
|
---|
508 | return *this;
|
---|
509 | }
|
---|
510 | iterator operator++(int) { iterator tmp(*this); ++*this; return tmp; }
|
---|
511 | iterator operator--(int) { iterator tmp(*this); --*this; return tmp; }
|
---|
512 |
|
---|
513 | bool operator==(const iterator& it) const {
|
---|
514 | return ( row_begin == it.row_begin &&
|
---|
515 | row_end == it.row_end &&
|
---|
516 | row_current == it.row_current &&
|
---|
517 | (row_current == row_end || col_current == it.col_current) );
|
---|
518 | }
|
---|
519 | bool operator!=(const iterator& it) const { return !(*this == it); }
|
---|
520 | };
|
---|
521 |
|
---|
522 | // We provide yet another version, to be as frugal with memory as
|
---|
523 | // possible. This one frees each block of memory as it finishes
|
---|
524 | // iterating over it. By the end, the entire table is freed.
|
---|
525 | // For understandable reasons, you can only iterate over it once,
|
---|
526 | // which is why it's an input iterator
|
---|
527 | template <class containertype>
|
---|
528 | class destructive_two_d_iterator {
|
---|
529 | public:
|
---|
530 | typedef destructive_two_d_iterator iterator;
|
---|
531 |
|
---|
532 | #ifdef UNDERSTANDS_ITERATOR_TAGS
|
---|
533 | typedef STL_NAMESPACE::input_iterator_tag iterator_category;
|
---|
534 | #endif
|
---|
535 | // apparently some versions of VC++ have trouble with two ::'s in a typename
|
---|
536 | typedef typename containertype::value_type _tmp_vt;
|
---|
537 | typedef typename _tmp_vt::value_type value_type;
|
---|
538 | typedef typename _tmp_vt::difference_type difference_type;
|
---|
539 | typedef typename _tmp_vt::reference reference;
|
---|
540 | typedef typename _tmp_vt::pointer pointer;
|
---|
541 |
|
---|
542 | destructive_two_d_iterator(typename containertype::iterator begin,
|
---|
543 | typename containertype::iterator end,
|
---|
544 | typename containertype::iterator curr)
|
---|
545 | : row_begin(begin), row_end(end), row_current(curr), col_current() {
|
---|
546 | if ( curr != end ) {
|
---|
547 | col_current = curr->TWOD_BEGIN_();
|
---|
548 | advance_past_end(); // in case cur->begin() == cur->end()
|
---|
549 | }
|
---|
550 | }
|
---|
551 | destructive_two_d_iterator(typename containertype::iterator begin,
|
---|
552 | typename containertype::iterator end,
|
---|
553 | typename containertype::iterator curr,
|
---|
554 | typename containertype::value_type::TWOD_ITER_ col)
|
---|
555 | : row_begin(begin), row_end(end), row_current(curr), col_current(col) {
|
---|
556 | advance_past_end(); // in case cur->begin() == cur->end()
|
---|
557 | }
|
---|
558 | destructive_two_d_iterator()
|
---|
559 | : row_begin(), row_end(), row_current(), col_current() {
|
---|
560 | }
|
---|
561 |
|
---|
562 | typename containertype::iterator row_begin, row_end, row_current;
|
---|
563 | typename containertype::value_type::TWOD_ITER_ col_current;
|
---|
564 |
|
---|
565 | // This is the part that destroys
|
---|
566 | void advance_past_end() { // used when col_current points to end()
|
---|
567 | while ( col_current == row_current->TWOD_END_() ) { // end of current row
|
---|
568 | row_current->clear(); // the destructive part
|
---|
569 | // It would be nice if we could decrement sparsetable->num_buckets here
|
---|
570 | ++row_current; // go to beginning of next
|
---|
571 | if ( row_current != row_end ) // col is irrelevant at end
|
---|
572 | col_current = row_current->TWOD_BEGIN_();
|
---|
573 | else
|
---|
574 | break; // don't go past row_end
|
---|
575 | }
|
---|
576 | }
|
---|
577 |
|
---|
578 | // EVERYTHING FROM HERE DOWN IS THE SAME AS THE REGULAR ITERATOR
|
---|
579 | reference operator*() const { return *col_current; }
|
---|
580 | pointer operator->() const { return &(operator*()); }
|
---|
581 |
|
---|
582 | iterator& operator++() {
|
---|
583 | assert(row_current != row_end); // how to ++ from there?
|
---|
584 | ++col_current;
|
---|
585 | advance_past_end(); // in case col_current is at end()
|
---|
586 | return *this;
|
---|
587 | }
|
---|
588 | iterator operator++(int) { iterator tmp(*this); ++*this; return tmp; }
|
---|
589 |
|
---|
590 | bool operator==(const iterator& it) const {
|
---|
591 | return ( row_begin == it.row_begin &&
|
---|
592 | row_end == it.row_end &&
|
---|
593 | row_current == it.row_current &&
|
---|
594 | (row_current == row_end || col_current == it.col_current) );
|
---|
595 | }
|
---|
596 | bool operator!=(const iterator& it) const { return !(*this == it); }
|
---|
597 | };
|
---|
598 |
|
---|
599 | #undef TWOD_BEGIN_
|
---|
600 | #undef TWOD_END_
|
---|
601 | #undef TWOD_ITER_
|
---|
602 | #undef TWOD_CONST_ITER_
|
---|
603 |
|
---|
604 |
|
---|
605 |
|
---|
606 |
|
---|
607 | // SPARSE-TABLE
|
---|
608 | // ------------
|
---|
609 | // The idea is that a table with (logically) t buckets is divided
|
---|
610 | // into t/M *groups* of M buckets each. (M is a constant set in
|
---|
611 | // GROUP_SIZE for efficiency.) Each group is stored sparsely.
|
---|
612 | // Thus, inserting into the table causes some array to grow, which is
|
---|
613 | // slow but still constant time. Lookup involves doing a
|
---|
614 | // logical-position-to-sparse-position lookup, which is also slow but
|
---|
615 | // constant time. The larger M is, the slower these operations are
|
---|
616 | // but the less overhead (slightly).
|
---|
617 | //
|
---|
618 | // To store the sparse array, we store a bitmap B, where B[i] = 1 iff
|
---|
619 | // bucket i is non-empty. Then to look up bucket i we really look up
|
---|
620 | // array[# of 1s before i in B]. This is constant time for fixed M.
|
---|
621 | //
|
---|
622 | // Terminology: the position of an item in the overall table (from
|
---|
623 | // 1 .. t) is called its "location." The logical position in a group
|
---|
624 | // (from 1 .. M ) is called its "position." The actual location in
|
---|
625 | // the array (from 1 .. # of non-empty buckets in the group) is
|
---|
626 | // called its "offset."
|
---|
627 |
|
---|
628 | // The weird mod in the offset is entirely to quiet compiler warnings
|
---|
629 | #define PUT_(take_from, offset) \
|
---|
630 | if ( putc(((take_from) >> ((offset) % (sizeof(take_from)*8))) % 256, fp) \
|
---|
631 | == EOF ) \
|
---|
632 | return false
|
---|
633 |
|
---|
634 | #define GET_(add_to, offset) \
|
---|
635 | if ((x=getc(fp)) == EOF) \
|
---|
636 | return false; \
|
---|
637 | else \
|
---|
638 | add_to |= (static_cast<size_type>(x) << ((offset) % (sizeof(add_to)*8)))
|
---|
639 |
|
---|
640 |
|
---|
641 | template <class T, uint16_t GROUP_SIZE>
|
---|
642 | class sparsegroup {
|
---|
643 | public:
|
---|
644 | // Basic types
|
---|
645 | typedef T value_type;
|
---|
646 | typedef value_type* pointer;
|
---|
647 | typedef const value_type* const_pointer;
|
---|
648 | typedef table_iterator<sparsegroup<T, GROUP_SIZE> > iterator;
|
---|
649 | typedef const_table_iterator<sparsegroup<T, GROUP_SIZE> > const_iterator;
|
---|
650 | typedef table_element_adaptor<sparsegroup<T, GROUP_SIZE> > element_adaptor;
|
---|
651 | typedef value_type &reference;
|
---|
652 | typedef const value_type &const_reference;
|
---|
653 | typedef uint16_t size_type; // max # of buckets
|
---|
654 | typedef int16_t difference_type;
|
---|
655 | typedef STL_NAMESPACE::reverse_iterator<const_iterator> const_reverse_iterator;
|
---|
656 | typedef STL_NAMESPACE::reverse_iterator<iterator> reverse_iterator;
|
---|
657 |
|
---|
658 | // These are our special iterators, that go over non-empty buckets in a
|
---|
659 | // group. These aren't const-only because you can change non-empty bcks.
|
---|
660 | typedef pointer nonempty_iterator;
|
---|
661 | typedef const_pointer const_nonempty_iterator;
|
---|
662 | typedef STL_NAMESPACE::reverse_iterator<nonempty_iterator> reverse_nonempty_iterator;
|
---|
663 | typedef STL_NAMESPACE::reverse_iterator<const_nonempty_iterator> const_reverse_nonempty_iterator;
|
---|
664 |
|
---|
665 | // Iterator functions
|
---|
666 | iterator begin() { return iterator(this, 0); }
|
---|
667 | const_iterator begin() const { return const_iterator(this, 0); }
|
---|
668 | iterator end() { return iterator(this, size()); }
|
---|
669 | const_iterator end() const { return const_iterator(this, size()); }
|
---|
670 | reverse_iterator rbegin() { return reverse_iterator(end()); }
|
---|
671 | const_reverse_iterator rbegin() const { return const_reverse_iterator(end()); }
|
---|
672 | reverse_iterator rend() { return reverse_iterator(begin()); }
|
---|
673 | const_reverse_iterator rend() const { return const_reverse_iterator(begin()); }
|
---|
674 |
|
---|
675 | // We'll have versions for our special non-empty iterator too
|
---|
676 | nonempty_iterator nonempty_begin() { return group; }
|
---|
677 | const_nonempty_iterator nonempty_begin() const { return group; }
|
---|
678 | nonempty_iterator nonempty_end() { return group + num_buckets; }
|
---|
679 | const_nonempty_iterator nonempty_end() const { return group + num_buckets; }
|
---|
680 | reverse_nonempty_iterator nonempty_rbegin() {
|
---|
681 | return reverse_nonempty_iterator(nonempty_end());
|
---|
682 | }
|
---|
683 | const_reverse_nonempty_iterator nonempty_rbegin() const {
|
---|
684 | return const_reverse_nonempty_iterator(nonempty_end());
|
---|
685 | }
|
---|
686 | reverse_nonempty_iterator nonempty_rend() {
|
---|
687 | return reverse_nonempty_iterator(nonempty_begin());
|
---|
688 | }
|
---|
689 | const_reverse_nonempty_iterator nonempty_rend() const {
|
---|
690 | return const_reverse_nonempty_iterator(nonempty_begin());
|
---|
691 | }
|
---|
692 |
|
---|
693 |
|
---|
694 | // This gives us the "default" value to return for an empty bucket.
|
---|
695 | // We just use the default constructor on T, the template type
|
---|
696 | const_reference default_value() const {
|
---|
697 | static value_type defaultval = value_type();
|
---|
698 | return defaultval;
|
---|
699 | }
|
---|
700 |
|
---|
701 |
|
---|
702 | private:
|
---|
703 | // We need to do all this bit manipulation, of course. ick
|
---|
704 | static size_type charbit(size_type i) { return i >> 3; }
|
---|
705 | static size_type modbit(size_type i) { return 1 << (i&7); }
|
---|
706 | bool bmtest(size_type i) const { return bitmap[charbit(i)] & modbit(i); }
|
---|
707 | void bmset(size_type i) { bitmap[charbit(i)] |= modbit(i); }
|
---|
708 | void bmclear(size_type i) { bitmap[charbit(i)] &= ~modbit(i); }
|
---|
709 |
|
---|
710 | // malloc / realloc that dies if allocation fails
|
---|
711 | void* realloc_or_die(void* ptr, size_t num_bytes) {
|
---|
712 | void* retval = realloc(ptr, num_bytes);
|
---|
713 | if (retval == NULL) {
|
---|
714 | fprintf(stderr, "FATAL ERROR: failed to allocate %d bytes for ptr %p",
|
---|
715 | num_bytes, ptr);
|
---|
716 | exit(1);
|
---|
717 | }
|
---|
718 | return retval;
|
---|
719 | }
|
---|
720 |
|
---|
721 | void* malloc_or_die(size_t num_bytes) {
|
---|
722 | return realloc_or_die(NULL, num_bytes);
|
---|
723 | }
|
---|
724 |
|
---|
725 | public: // get_iter() in sparsetable needs it
|
---|
726 | // We need a small function that tells us how many set bits there are
|
---|
727 | // in positions 0..i-1 of the bitmap. It uses a big table.
|
---|
728 | // We make it static so templates don't allocate lots of these tables
|
---|
729 | static size_type pos_to_offset(const unsigned char *bm, size_type pos) {
|
---|
730 | // We could make these ints. The tradeoff is size (eg does it overwhelm
|
---|
731 | // the cache?) vs efficiency in referencing sub-word-sized array elements
|
---|
732 | static const char bits_in[256] = { // # of bits set in one char
|
---|
733 | 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
|
---|
734 | 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
|
---|
735 | 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
|
---|
736 | 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
---|
737 | 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
|
---|
738 | 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
---|
739 | 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
---|
740 | 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
|
---|
741 | 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
|
---|
742 | 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
---|
743 | 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
---|
744 | 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
|
---|
745 | 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
---|
746 | 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
|
---|
747 | 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
|
---|
748 | 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8,
|
---|
749 | };
|
---|
750 | size_type retval = 0;
|
---|
751 |
|
---|
752 | // [Note: condition pos > 8 is an optimization; convince yourself we
|
---|
753 | // give exactly the same result as if we had pos >= 8 here instead.]
|
---|
754 | for ( ; pos > 8; pos -= 8 ) // bm[0..pos/8-1]
|
---|
755 | retval += bits_in[*bm++]; // chars we want *all* bits in
|
---|
756 | return retval + bits_in[*bm & ((1 << pos)-1)]; // the char that includes pos
|
---|
757 | }
|
---|
758 |
|
---|
759 | size_type pos_to_offset(size_type pos) const { // not static but still const
|
---|
760 | return pos_to_offset(bitmap, pos);
|
---|
761 | }
|
---|
762 |
|
---|
763 |
|
---|
764 | public:
|
---|
765 | // Constructors -- default and copy -- and destructor
|
---|
766 | sparsegroup() : group(0), num_buckets(0) { memset(bitmap, 0, sizeof(bitmap)); }
|
---|
767 | sparsegroup(const sparsegroup& x) : group(0), num_buckets(x.num_buckets) {
|
---|
768 | if ( num_buckets ) {
|
---|
769 | group = (value_type *)malloc_or_die(x.num_buckets * sizeof(*group));
|
---|
770 | memcpy(group, x.group, x.num_buckets * sizeof(*group));
|
---|
771 | }
|
---|
772 | memcpy(bitmap, x.bitmap, sizeof(bitmap));
|
---|
773 | }
|
---|
774 | ~sparsegroup() { free(group); } // free(NULL) does the right thing
|
---|
775 |
|
---|
776 | // Operator= is just like the copy constructor, I guess
|
---|
777 | sparsegroup &operator=(const sparsegroup& x) {
|
---|
778 | if ( &x == this ) return *this; // x = x
|
---|
779 | if ( x.num_buckets == 0 ) {
|
---|
780 | free(group);
|
---|
781 | group = NULL;
|
---|
782 | } else {
|
---|
783 | group = (value_type *)
|
---|
784 | realloc_or_die(group, x.num_buckets * sizeof(*group));
|
---|
785 | memcpy(group, x.group, x.num_buckets * sizeof(*group));
|
---|
786 | }
|
---|
787 | memcpy(bitmap, x.bitmap, sizeof(bitmap));
|
---|
788 | num_buckets = x.num_buckets;
|
---|
789 | return *this;
|
---|
790 | }
|
---|
791 |
|
---|
792 | // Many STL algorithms use swap instead of copy constructors
|
---|
793 | void swap(sparsegroup& x) {
|
---|
794 | STL_NAMESPACE::swap(group, x.group);
|
---|
795 | for ( int i = 0; i < sizeof(bitmap) / sizeof(*bitmap); ++i )
|
---|
796 | STL_NAMESPACE::swap(bitmap[i], x.bitmap[i]); // swap not defined on arrays
|
---|
797 | STL_NAMESPACE::swap(num_buckets, x.num_buckets);
|
---|
798 | }
|
---|
799 |
|
---|
800 | // It's always nice to be able to clear a table without deallocating it
|
---|
801 | void clear() {
|
---|
802 | if ( group )
|
---|
803 | free(group);
|
---|
804 | group = NULL;
|
---|
805 | memset(bitmap, 0, sizeof(bitmap));
|
---|
806 | num_buckets = 0;
|
---|
807 | }
|
---|
808 |
|
---|
809 | // Functions that tell you about size. Alas, these aren't so useful
|
---|
810 | // because our table is always fixed size.
|
---|
811 | size_type size() const { return GROUP_SIZE; }
|
---|
812 | size_type max_size() const { return GROUP_SIZE; }
|
---|
813 | bool empty() const { return false; }
|
---|
814 | // We also may want to know how many *used* buckets there are
|
---|
815 | size_type num_nonempty() const { return num_buckets; }
|
---|
816 |
|
---|
817 |
|
---|
818 | // get()/set() are explicitly const/non-const. You can use [] if
|
---|
819 | // you want something that can be either (potentially more expensive).
|
---|
820 | const_reference get(size_type i) const {
|
---|
821 | if ( bmtest(i) ) // bucket i is occupied
|
---|
822 | return group[pos_to_offset(bitmap, i)];
|
---|
823 | else
|
---|
824 | return default_value(); // return the default reference
|
---|
825 | }
|
---|
826 |
|
---|
827 | // TODO(csilvers): make protected + friend
|
---|
828 | reference mutating_get(size_type i) { // fills bucket i before getting
|
---|
829 | if ( !bmtest(i) )
|
---|
830 | set(i, default_value());
|
---|
831 | return group[pos_to_offset(bitmap, i)];
|
---|
832 | }
|
---|
833 |
|
---|
834 | // Syntactic sugar. It's easy to return a const reference. To
|
---|
835 | // return a non-const reference, we need to use the assigner adaptor.
|
---|
836 | const_reference operator[](size_type i) const {
|
---|
837 | return get(i);
|
---|
838 | }
|
---|
839 |
|
---|
840 | element_adaptor operator[](size_type i) {
|
---|
841 | return element_adaptor(this, i);
|
---|
842 | }
|
---|
843 |
|
---|
844 | // This returns a reference to the inserted item (which is a copy of val)
|
---|
845 | reference set(size_type i, const_reference val) {
|
---|
846 | size_type offset = pos_to_offset(bitmap, i); // where we'll find (or insert)
|
---|
847 | if ( !bmtest(i) ) { // make space at group[offset]
|
---|
848 | // We know the realloc and memmove are safe because we ensure
|
---|
849 | // value_type is a Plain Old Data Type (see below)
|
---|
850 | group = (value_type *)
|
---|
851 | realloc_or_die(group, sizeof(*group) * ++num_buckets);
|
---|
852 | memmove(group + offset+1, group + offset,
|
---|
853 | (num_buckets-1 - offset) * sizeof(*group));
|
---|
854 | bmset(i);
|
---|
855 | }
|
---|
856 | // This does the actual inserting. Since we made the array using
|
---|
857 | // malloc, we use "placement new" to just call the constructor.
|
---|
858 | // We really should destruct this later, via group[offset].~value_type(),
|
---|
859 | // every time we free(), but we don't because it's a POD type.
|
---|
860 | new(&group[offset]) value_type(val);
|
---|
861 | return group[offset];
|
---|
862 | }
|
---|
863 |
|
---|
864 | // We let you see if a bucket is non-empty without retrieving it
|
---|
865 | bool test(size_type i) const {
|
---|
866 | return bmtest(i);
|
---|
867 | }
|
---|
868 | bool test(iterator pos) const {
|
---|
869 | return bmtest(pos.pos);
|
---|
870 | }
|
---|
871 |
|
---|
872 | // This takes the specified elements out of the group. This is
|
---|
873 | // "undefining", rather than "clearing".
|
---|
874 | void erase(size_type i) {
|
---|
875 | if ( bmtest(i) ) { // trivial to erase empty bucket
|
---|
876 | size_type offset = pos_to_offset(bitmap,i); // where we'll find (or insert)
|
---|
877 | if ( --num_buckets == 0 ) {
|
---|
878 | free(group);
|
---|
879 | group = NULL;
|
---|
880 | } else {
|
---|
881 | memmove(group + offset, group + offset+1,
|
---|
882 | (num_buckets - offset) * sizeof(*group));
|
---|
883 | group = (value_type *)
|
---|
884 | realloc_or_die(group, sizeof(*group) * num_buckets);
|
---|
885 | }
|
---|
886 | bmclear(i);
|
---|
887 | }
|
---|
888 | }
|
---|
889 |
|
---|
890 | void erase(iterator pos) {
|
---|
891 | erase(pos.pos);
|
---|
892 | }
|
---|
893 |
|
---|
894 | void erase(iterator start, iterator end) {
|
---|
895 | // This could be more efficient, but to do so we'd need to make
|
---|
896 | // bmclear() clear a range of indices. Doesn't seem worth it.
|
---|
897 | for ( ; start != end; ++start )
|
---|
898 | erase(start);
|
---|
899 | }
|
---|
900 |
|
---|
901 |
|
---|
902 | // I/O
|
---|
903 | // We support reading and writing groups to disk. We don't store
|
---|
904 | // the actual array contents (which we don't know how to store),
|
---|
905 | // just the bitmap and size. Meant to be used with table I/O.
|
---|
906 | // Returns true if all was ok
|
---|
907 | bool write_metadata(FILE *fp) const {
|
---|
908 | assert(sizeof(num_buckets) == 2); // we explicitly set to u_int16_t
|
---|
909 | PUT_(num_buckets, 8);
|
---|
910 | PUT_(num_buckets, 0);
|
---|
911 | if ( !fwrite(bitmap, sizeof(bitmap), 1, fp) ) return false;
|
---|
912 | return true;
|
---|
913 | }
|
---|
914 |
|
---|
915 | // Reading destroys the old group contents! Returns true if all was ok
|
---|
916 | bool read_metadata(FILE *fp) {
|
---|
917 | clear();
|
---|
918 |
|
---|
919 | int x; // the GET_ macro requires an 'int x' to be defined
|
---|
920 | GET_(num_buckets, 8);
|
---|
921 | GET_(num_buckets, 0);
|
---|
922 |
|
---|
923 | if ( !fread(bitmap, sizeof(bitmap), 1, fp) ) return false;
|
---|
924 |
|
---|
925 | // We'll allocate the space; you'd better fill it somehow!
|
---|
926 | group = (value_type *)malloc(num_buckets * sizeof(*group));
|
---|
927 | if ( group == NULL ) return false;
|
---|
928 | return true;
|
---|
929 | }
|
---|
930 |
|
---|
931 | // If your keys and values are simple enough, we can write them
|
---|
932 | // to disk for you. "simple enough" means no pointers.
|
---|
933 | // However, we don't try to normalize endianness
|
---|
934 | bool write_nopointer_data(FILE *fp) const {
|
---|
935 | for ( const_nonempty_iterator it = nonempty_begin();
|
---|
936 | it != nonempty_end(); ++it ) {
|
---|
937 | if ( !fwrite(&*it, sizeof(*it), 1, fp) ) return false;
|
---|
938 | }
|
---|
939 | return true;
|
---|
940 | }
|
---|
941 |
|
---|
942 | // When reading, we have to override the potential const-ness of *it
|
---|
943 | bool read_nopointer_data(FILE *fp) {
|
---|
944 | for ( nonempty_iterator it = nonempty_begin();
|
---|
945 | it != nonempty_end(); ++it ) {
|
---|
946 | if ( !fread(reinterpret_cast<void*>(&(*it)), sizeof(*it), 1, fp) )
|
---|
947 | return false;
|
---|
948 | }
|
---|
949 | return true;
|
---|
950 | }
|
---|
951 |
|
---|
952 | // Comparisons. Note the comparisons are pretty arbitrary: we
|
---|
953 | // compare values of the first index that isn't equal (using default
|
---|
954 | // value for empty buckets).
|
---|
955 | bool operator==(const sparsegroup& x) const {
|
---|
956 | return ( num_buckets == x.num_buckets &&
|
---|
957 | memcmp(bitmap, x.bitmap, sizeof(bitmap)) == 0 &&
|
---|
958 | STL_NAMESPACE::equal(begin(), end(), x.begin()) ); // from algorithm
|
---|
959 | }
|
---|
960 | bool operator<(const sparsegroup& x) const { // also from algorithm
|
---|
961 | return STL_NAMESPACE::lexicographical_compare(begin(), end(),
|
---|
962 | x.begin(), x.end());
|
---|
963 | }
|
---|
964 | bool operator!=(const sparsegroup& x) const { return !(*this == x); }
|
---|
965 | bool operator<=(const sparsegroup& x) const { return *this < x || *this == x; }
|
---|
966 | bool operator>(const sparsegroup& x) const { return x <= *this; }
|
---|
967 | bool operator>=(const sparsegroup& x) const { return x < *this; }
|
---|
968 |
|
---|
969 | private:
|
---|
970 | // We need to enforce that our value_type is a Plain Old Data Type
|
---|
971 | // (so we know realloc and memmove will work). We use traits to
|
---|
972 | // enforce this. The following gives a compile-time error if
|
---|
973 | // is_POD_type is false (which is the default for user types).
|
---|
974 | //
|
---|
975 | // IF YOU GET AN ERROR HERE, make sure your class is a POD type,
|
---|
976 | // and if so tell the compiler via code similar to this:
|
---|
977 | // template<> struct __type_traits<classname> {
|
---|
978 | // typedef __true_type has_trivial_default_constructor;
|
---|
979 | // typedef __true_type has_trivial_copy_constructor;
|
---|
980 | // typedef __true_type has_trivial_assignment_operator;
|
---|
981 | // typedef __true_type has_trivial_destructor;
|
---|
982 | // typedef __true_type is_POD_type;
|
---|
983 | // };
|
---|
984 | //
|
---|
985 | // If this is part of a hash_map, you need to make sure both the
|
---|
986 | // Key and Value types are POD types, if they're user-defined.
|
---|
987 | #ifdef UNDERSTANDS_TYPE_TRAITS
|
---|
988 | static __true_type * const enforce_pod;
|
---|
989 | #endif
|
---|
990 |
|
---|
991 | // The actual data
|
---|
992 | value_type *group; // (small) array of T's
|
---|
993 | unsigned char bitmap[(GROUP_SIZE-1)/8 + 1]; // fancy math is so we round up
|
---|
994 | size_type num_buckets; // limits GROUP_SIZE to 64K
|
---|
995 | };
|
---|
996 |
|
---|
997 | // We need a global swap as well
|
---|
998 | template <class T, uint16_t GROUP_SIZE>
|
---|
999 | inline void swap(sparsegroup<T,GROUP_SIZE> &x, sparsegroup<T,GROUP_SIZE> &y) {
|
---|
1000 | x.swap(y);
|
---|
1001 | }
|
---|
1002 |
|
---|
1003 | #ifdef UNDERSTANDS_TYPE_TRAITS
|
---|
1004 | template <class T, uint16_t GROUP_SIZE>
|
---|
1005 | __true_type * const sparsegroup<T, GROUP_SIZE>::enforce_pod =
|
---|
1006 | static_cast<typename __type_traits<value_type>::is_POD_type *>(0);
|
---|
1007 | #endif
|
---|
1008 |
|
---|
1009 | // ---------------------------------------------------------------------------
|
---|
1010 |
|
---|
1011 |
|
---|
1012 | template <class T, uint16_t GROUP_SIZE = DEFAULT_SPARSEGROUP_SIZE>
|
---|
1013 | class sparsetable {
|
---|
1014 | public:
|
---|
1015 | // Basic types
|
---|
1016 | typedef T value_type; // stolen from stl_vector.h
|
---|
1017 | typedef value_type* pointer;
|
---|
1018 | typedef const value_type* const_pointer;
|
---|
1019 | typedef table_iterator<sparsetable<T, GROUP_SIZE> > iterator;
|
---|
1020 | typedef const_table_iterator<sparsetable<T, GROUP_SIZE> > const_iterator;
|
---|
1021 | typedef table_element_adaptor<sparsetable<T, GROUP_SIZE> > element_adaptor;
|
---|
1022 | typedef value_type &reference;
|
---|
1023 | typedef const value_type &const_reference;
|
---|
1024 | typedef size_t size_type;
|
---|
1025 | typedef ptrdiff_t difference_type;
|
---|
1026 | typedef STL_NAMESPACE::reverse_iterator<const_iterator> const_reverse_iterator;
|
---|
1027 | typedef STL_NAMESPACE::reverse_iterator<iterator> reverse_iterator;
|
---|
1028 |
|
---|
1029 | // These are our special iterators, that go over non-empty buckets in a
|
---|
1030 | // table. These aren't const only because you can change non-empty bcks.
|
---|
1031 | typedef two_d_iterator< vector< sparsegroup<value_type, GROUP_SIZE> > >
|
---|
1032 | nonempty_iterator;
|
---|
1033 | typedef const_two_d_iterator< vector< sparsegroup<value_type, GROUP_SIZE> > >
|
---|
1034 | const_nonempty_iterator;
|
---|
1035 | typedef STL_NAMESPACE::reverse_iterator<nonempty_iterator> reverse_nonempty_iterator;
|
---|
1036 | typedef STL_NAMESPACE::reverse_iterator<const_nonempty_iterator> const_reverse_nonempty_iterator;
|
---|
1037 | // Another special iterator: it frees memory as it iterates (used to resize)
|
---|
1038 | typedef destructive_two_d_iterator< vector< sparsegroup<value_type, GROUP_SIZE> > >
|
---|
1039 | destructive_iterator;
|
---|
1040 |
|
---|
1041 | // Iterator functions
|
---|
1042 | iterator begin() { return iterator(this, 0); }
|
---|
1043 | const_iterator begin() const { return const_iterator(this, 0); }
|
---|
1044 | iterator end() { return iterator(this, size()); }
|
---|
1045 | const_iterator end() const { return const_iterator(this, size()); }
|
---|
1046 | reverse_iterator rbegin() { return reverse_iterator(end()); }
|
---|
1047 | const_reverse_iterator rbegin() const { return const_reverse_iterator(end()); }
|
---|
1048 | reverse_iterator rend() { return reverse_iterator(begin()); }
|
---|
1049 | const_reverse_iterator rend() const { return const_reverse_iterator(begin()); }
|
---|
1050 |
|
---|
1051 | // Versions for our special non-empty iterator
|
---|
1052 | nonempty_iterator nonempty_begin() {
|
---|
1053 | return nonempty_iterator(groups.begin(), groups.end(), groups.begin());
|
---|
1054 | }
|
---|
1055 | const_nonempty_iterator nonempty_begin() const {
|
---|
1056 | return const_nonempty_iterator(groups.begin(),groups.end(), groups.begin());
|
---|
1057 | }
|
---|
1058 | nonempty_iterator nonempty_end() {
|
---|
1059 | return nonempty_iterator(groups.begin(), groups.end(), groups.end());
|
---|
1060 | }
|
---|
1061 | const_nonempty_iterator nonempty_end() const {
|
---|
1062 | return const_nonempty_iterator(groups.begin(), groups.end(), groups.end());
|
---|
1063 | }
|
---|
1064 | reverse_nonempty_iterator nonempty_rbegin() {
|
---|
1065 | return reverse_nonempty_iterator(nonempty_end());
|
---|
1066 | }
|
---|
1067 | const_reverse_nonempty_iterator nonempty_rbegin() const {
|
---|
1068 | return const_reverse_nonempty_iterator(nonempty_end());
|
---|
1069 | }
|
---|
1070 | reverse_nonempty_iterator nonempty_rend() {
|
---|
1071 | return reverse_nonempty_iterator(nonempty_begin());
|
---|
1072 | }
|
---|
1073 | const_reverse_nonempty_iterator nonempty_rend() const {
|
---|
1074 | return const_reverse_nonempty_iterator(nonempty_begin());
|
---|
1075 | }
|
---|
1076 | destructive_iterator destructive_begin() {
|
---|
1077 | return destructive_iterator(groups.begin(), groups.end(), groups.begin());
|
---|
1078 | }
|
---|
1079 | destructive_iterator destructive_end() {
|
---|
1080 | return destructive_iterator(groups.begin(), groups.end(), groups.end());
|
---|
1081 | }
|
---|
1082 |
|
---|
1083 | private:
|
---|
1084 | typedef typename vector< sparsegroup<value_type, GROUP_SIZE> >::reference
|
---|
1085 | GroupsReference;
|
---|
1086 | typedef typename
|
---|
1087 | vector< sparsegroup<value_type, GROUP_SIZE> >::const_reference
|
---|
1088 | GroupsConstReference;
|
---|
1089 | typedef typename vector< sparsegroup<value_type, GROUP_SIZE> >::iterator
|
---|
1090 | GroupsIterator;
|
---|
1091 | typedef typename vector< sparsegroup<value_type, GROUP_SIZE> >::const_iterator
|
---|
1092 | GroupsConstIterator;
|
---|
1093 |
|
---|
1094 | // How to deal with the proper group
|
---|
1095 | static size_type num_groups(size_type num) { // how many to hold num buckets
|
---|
1096 | return num == 0 ? 0 : ((num-1) / GROUP_SIZE) + 1;
|
---|
1097 | }
|
---|
1098 |
|
---|
1099 | size_type pos_in_group(size_type i) const { return i % GROUP_SIZE; }
|
---|
1100 | size_type group_num(size_type i) const { return i / GROUP_SIZE; }
|
---|
1101 | GroupsReference which_group(size_type i) { return groups[group_num(i)]; }
|
---|
1102 | GroupsConstReference which_group(size_type i) const
|
---|
1103 | { return groups[group_num(i)]; }
|
---|
1104 |
|
---|
1105 |
|
---|
1106 | public:
|
---|
1107 | // Constructors -- default, normal (when you specify size), and copy
|
---|
1108 | sparsetable(size_type size = 0)
|
---|
1109 | : groups(num_groups(size)), table_size(size), num_buckets(0) { }
|
---|
1110 | // We'll can get away with using the default copy constructor,
|
---|
1111 | // and default destructor, and hence the default operator=. Huzzah!
|
---|
1112 |
|
---|
1113 | // Many STL algorithms use swap instead of copy constructors
|
---|
1114 | void swap(sparsetable& x) {
|
---|
1115 | STL_NAMESPACE::swap(groups, x.groups);
|
---|
1116 | STL_NAMESPACE::swap(table_size, x.table_size);
|
---|
1117 | STL_NAMESPACE::swap(num_buckets, x.num_buckets);
|
---|
1118 | }
|
---|
1119 |
|
---|
1120 | // It's always nice to be able to clear a table without deallocating it
|
---|
1121 | void clear() {
|
---|
1122 | GroupsIterator group;
|
---|
1123 | for ( group = groups.begin(); group != groups.end(); ++group ) {
|
---|
1124 | group->clear();
|
---|
1125 | }
|
---|
1126 | num_buckets = 0;
|
---|
1127 | }
|
---|
1128 |
|
---|
1129 | // Functions that tell you about size.
|
---|
1130 | // NOTE: empty() is non-intuitive! It does not tell you the number
|
---|
1131 | // of not-empty buckets (use num_nonempty() for that). Instead
|
---|
1132 | // it says whether you've allocated any buckets or not.
|
---|
1133 | size_type size() const { return table_size; }
|
---|
1134 | size_type max_size() const { return size_type(-1); }
|
---|
1135 | bool empty() const { return table_size == 0; }
|
---|
1136 | // We also may want to know how many *used* buckets there are
|
---|
1137 | size_type num_nonempty() const { return num_buckets; }
|
---|
1138 |
|
---|
1139 | // OK, we'll let you resize one of these puppies
|
---|
1140 | void resize(size_type new_size) {
|
---|
1141 | groups.resize(num_groups(new_size));
|
---|
1142 | if ( new_size < table_size) { // lower num_buckets, clear last group
|
---|
1143 | if ( new_size % GROUP_SIZE > 0 ) // need to clear from last group
|
---|
1144 | groups.back().erase(groups.back().begin() + (new_size % GROUP_SIZE),
|
---|
1145 | groups.back().end());
|
---|
1146 | num_buckets = 0; // refigure # of used buckets
|
---|
1147 | GroupsConstIterator group;
|
---|
1148 | for ( group = groups.begin(); group != groups.end(); ++group )
|
---|
1149 | num_buckets += group->num_nonempty();
|
---|
1150 | }
|
---|
1151 | table_size = new_size;
|
---|
1152 | }
|
---|
1153 |
|
---|
1154 |
|
---|
1155 | // We let you see if a bucket is non-empty without retrieving it
|
---|
1156 | bool test(size_type i) const {
|
---|
1157 | return which_group(i).test(pos_in_group(i));
|
---|
1158 | }
|
---|
1159 | bool test(iterator pos) const {
|
---|
1160 | return which_group(pos.pos).test(pos_in_group(pos.pos));
|
---|
1161 | }
|
---|
1162 | bool test(const_iterator pos) const {
|
---|
1163 | return which_group(pos.pos).test(pos_in_group(pos.pos));
|
---|
1164 | }
|
---|
1165 |
|
---|
1166 | // We only return const_references because it's really hard to
|
---|
1167 | // return something settable for empty buckets. Use set() instead.
|
---|
1168 | const_reference get(size_type i) const {
|
---|
1169 | assert(i < table_size);
|
---|
1170 | return which_group(i).get(pos_in_group(i));
|
---|
1171 | }
|
---|
1172 |
|
---|
1173 | // TODO(csilvers): make protected + friend element_adaptor
|
---|
1174 | reference mutating_get(size_type i) { // fills bucket i before getting
|
---|
1175 | assert(i < table_size);
|
---|
1176 | size_type old_numbuckets = which_group(i).num_nonempty();
|
---|
1177 | reference retval = which_group(i).mutating_get(pos_in_group(i));
|
---|
1178 | num_buckets += which_group(i).num_nonempty() - old_numbuckets;
|
---|
1179 | return retval;
|
---|
1180 | }
|
---|
1181 |
|
---|
1182 | // Syntactic sugar. As in sparsegroup, the non-const version is harder
|
---|
1183 | const_reference operator[](size_type i) const {
|
---|
1184 | return get(i);
|
---|
1185 | }
|
---|
1186 |
|
---|
1187 | element_adaptor operator[](size_type i) {
|
---|
1188 | return element_adaptor(this, i);
|
---|
1189 | }
|
---|
1190 |
|
---|
1191 | // Needed for hashtables, gets as a nonempty_iterator. Crashes for empty bcks
|
---|
1192 | const_nonempty_iterator get_iter(size_type i) const {
|
---|
1193 | assert(test(i)); // how can a nonempty_iterator point to an empty bucket?
|
---|
1194 | return const_nonempty_iterator(
|
---|
1195 | groups.begin(), groups.end(),
|
---|
1196 | groups.begin() + group_num(i),
|
---|
1197 | (groups[group_num(i)].nonempty_begin() +
|
---|
1198 | groups[group_num(i)].pos_to_offset(pos_in_group(i))));
|
---|
1199 | }
|
---|
1200 | // For nonempty we can return a non-const version
|
---|
1201 | nonempty_iterator get_iter(size_type i) {
|
---|
1202 | assert(test(i)); // how can a nonempty_iterator point to an empty bucket?
|
---|
1203 | return nonempty_iterator(
|
---|
1204 | groups.begin(), groups.end(),
|
---|
1205 | groups.begin() + group_num(i),
|
---|
1206 | (groups[group_num(i)].nonempty_begin() +
|
---|
1207 | groups[group_num(i)].pos_to_offset(pos_in_group(i))));
|
---|
1208 | }
|
---|
1209 |
|
---|
1210 |
|
---|
1211 | // This returns a reference to the inserted item (which is a copy of val)
|
---|
1212 | // The trick is to figure out whether we're replacing or inserting anew
|
---|
1213 | reference set(size_type i, const_reference val) {
|
---|
1214 | assert(i < table_size);
|
---|
1215 | size_type old_numbuckets = which_group(i).num_nonempty();
|
---|
1216 | reference retval = which_group(i).set(pos_in_group(i), val);
|
---|
1217 | num_buckets += which_group(i).num_nonempty() - old_numbuckets;
|
---|
1218 | return retval;
|
---|
1219 | }
|
---|
1220 |
|
---|
1221 | // This takes the specified elements out of the table. This is
|
---|
1222 | // "undefining", rather than "clearing".
|
---|
1223 | void erase(size_type i) {
|
---|
1224 | assert(i < table_size);
|
---|
1225 | size_type old_numbuckets = which_group(i).num_nonempty();
|
---|
1226 | which_group(i).erase(pos_in_group(i));
|
---|
1227 | num_buckets += which_group(i).num_nonempty() - old_numbuckets;
|
---|
1228 | }
|
---|
1229 |
|
---|
1230 | void erase(iterator pos) {
|
---|
1231 | erase(pos.pos);
|
---|
1232 | }
|
---|
1233 |
|
---|
1234 | void erase(iterator start, iterator end) {
|
---|
1235 | // This could be more efficient, but then we'd need to figure
|
---|
1236 | // out if we spanned groups or not. Doesn't seem worth it.
|
---|
1237 | for ( ; start != end; ++start )
|
---|
1238 | erase(start);
|
---|
1239 | }
|
---|
1240 |
|
---|
1241 |
|
---|
1242 | // We support reading and writing tables to disk. We don't store
|
---|
1243 | // the actual array contents (which we don't know how to store),
|
---|
1244 | // just the groups and sizes. Returns true if all went ok.
|
---|
1245 |
|
---|
1246 | private:
|
---|
1247 | // Every time the disk format changes, this should probably change too
|
---|
1248 | static const unsigned long MAGIC_NUMBER = 0x24687531;
|
---|
1249 |
|
---|
1250 | // Old versions of this code write all data in 32 bits. We need to
|
---|
1251 | // support these files as well as having support for 64-bit systems.
|
---|
1252 | // So we use the following encoding scheme: for values < 2^32-1, we
|
---|
1253 | // store in 4 bytes in big-endian order. For values > 2^32, we
|
---|
1254 | // store 0xFFFFFFF followed by 8 bytes in big-endian order. This
|
---|
1255 | // causes us to mis-read old-version code that stores exactly
|
---|
1256 | // 0xFFFFFFF, but I don't think that is likely to have happened for
|
---|
1257 | // these particular values.
|
---|
1258 | static bool write_32_or_64(FILE* fp, size_type value) {
|
---|
1259 | if ( value < 0xFFFFFFFFULL ) { // fits in 4 bytes
|
---|
1260 | PUT_(value, 24);
|
---|
1261 | PUT_(value, 16);
|
---|
1262 | PUT_(value, 8);
|
---|
1263 | PUT_(value, 0);
|
---|
1264 | } else if ( value == 0xFFFFFFFFUL ) { // special case in 32bit systems
|
---|
1265 | PUT_(0xFF, 0); PUT_(0xFF, 0); PUT_(0xFF, 0); PUT_(0xFF, 0); // marker
|
---|
1266 | PUT_(0, 0); PUT_(0, 0); PUT_(0, 0); PUT_(0, 0);
|
---|
1267 | PUT_(0xFF, 0); PUT_(0xFF, 0); PUT_(0xFF, 0); PUT_(0xFF, 0);
|
---|
1268 | } else {
|
---|
1269 | PUT_(0xFF, 0); PUT_(0xFF, 0); PUT_(0xFF, 0); PUT_(0xFF, 0); // marker
|
---|
1270 | PUT_(value, 56);
|
---|
1271 | PUT_(value, 48);
|
---|
1272 | PUT_(value, 40);
|
---|
1273 | PUT_(value, 32);
|
---|
1274 | PUT_(value, 24);
|
---|
1275 | PUT_(value, 16);
|
---|
1276 | PUT_(value, 8);
|
---|
1277 | PUT_(value, 0);
|
---|
1278 | }
|
---|
1279 | return true;
|
---|
1280 | }
|
---|
1281 |
|
---|
1282 | static bool read_32_or_64(FILE* fp, size_type *value) { // reads into value
|
---|
1283 | size_type first4 = 0;
|
---|
1284 | int x;
|
---|
1285 | GET_(first4, 24);
|
---|
1286 | GET_(first4, 16);
|
---|
1287 | GET_(first4, 8);
|
---|
1288 | GET_(first4, 0);
|
---|
1289 | if ( first4 < 0xFFFFFFFFULL ) {
|
---|
1290 | *value = first4;
|
---|
1291 | } else {
|
---|
1292 | GET_(*value, 56);
|
---|
1293 | GET_(*value, 48);
|
---|
1294 | GET_(*value, 40);
|
---|
1295 | GET_(*value, 32);
|
---|
1296 | GET_(*value, 24);
|
---|
1297 | GET_(*value, 16);
|
---|
1298 | GET_(*value, 8);
|
---|
1299 | GET_(*value, 0);
|
---|
1300 | }
|
---|
1301 | return true;
|
---|
1302 | }
|
---|
1303 |
|
---|
1304 | public:
|
---|
1305 | bool write_metadata(FILE *fp) const {
|
---|
1306 | if ( !write_32_or_64(fp, MAGIC_NUMBER) ) return false;
|
---|
1307 | if ( !write_32_or_64(fp, table_size) ) return false;
|
---|
1308 | if ( !write_32_or_64(fp, num_buckets) ) return false;
|
---|
1309 |
|
---|
1310 | GroupsConstIterator group;
|
---|
1311 | for ( group = groups.begin(); group != groups.end(); ++group )
|
---|
1312 | if ( group->write_metadata(fp) == false ) return false;
|
---|
1313 | return true;
|
---|
1314 | }
|
---|
1315 |
|
---|
1316 | // Reading destroys the old table contents! Returns true if read ok.
|
---|
1317 | bool read_metadata(FILE *fp) {
|
---|
1318 | size_type magic_read = 0;
|
---|
1319 | if ( !read_32_or_64(fp, &magic_read) ) return false;
|
---|
1320 | if ( magic_read != MAGIC_NUMBER ) {
|
---|
1321 | clear(); // just to be consistent
|
---|
1322 | return false;
|
---|
1323 | }
|
---|
1324 |
|
---|
1325 | if ( !read_32_or_64(fp, &table_size) ) return false;
|
---|
1326 | if ( !read_32_or_64(fp, &num_buckets) ) return false;
|
---|
1327 |
|
---|
1328 | resize(table_size); // so the vector's sized ok
|
---|
1329 | GroupsIterator group;
|
---|
1330 | for ( group = groups.begin(); group != groups.end(); ++group )
|
---|
1331 | if ( group->read_metadata(fp) == false ) return false;
|
---|
1332 | return true;
|
---|
1333 | }
|
---|
1334 |
|
---|
1335 | // This code is identical to that for SparseGroup
|
---|
1336 | // If your keys and values are simple enough, we can write them
|
---|
1337 | // to disk for you. "simple enough" means no pointers.
|
---|
1338 | // However, we don't try to normalize endianness
|
---|
1339 | bool write_nopointer_data(FILE *fp) const {
|
---|
1340 | for ( const_nonempty_iterator it = nonempty_begin();
|
---|
1341 | it != nonempty_end(); ++it ) {
|
---|
1342 | if ( !fwrite(&*it, sizeof(*it), 1, fp) ) return false;
|
---|
1343 | }
|
---|
1344 | return true;
|
---|
1345 | }
|
---|
1346 |
|
---|
1347 | // When reading, we have to override the potential const-ness of *it
|
---|
1348 | bool read_nopointer_data(FILE *fp) {
|
---|
1349 | for ( nonempty_iterator it = nonempty_begin();
|
---|
1350 | it != nonempty_end(); ++it ) {
|
---|
1351 | if ( !fread(reinterpret_cast<void*>(&(*it)), sizeof(*it), 1, fp) )
|
---|
1352 | return false;
|
---|
1353 | }
|
---|
1354 | return true;
|
---|
1355 | }
|
---|
1356 |
|
---|
1357 | // Comparisons. Note the comparisons are pretty arbitrary: we
|
---|
1358 | // compare values of the first index that isn't equal (using default
|
---|
1359 | // value for empty buckets).
|
---|
1360 | bool operator==(const sparsetable& x) const {
|
---|
1361 | return ( table_size == x.table_size &&
|
---|
1362 | num_buckets == x.num_buckets &&
|
---|
1363 | groups == x.groups );
|
---|
1364 | }
|
---|
1365 | bool operator<(const sparsetable& x) const { // also from algobase.h
|
---|
1366 | return STL_NAMESPACE::lexicographical_compare(begin(), end(),
|
---|
1367 | x.begin(), x.end());
|
---|
1368 | }
|
---|
1369 | bool operator!=(const sparsetable& x) const { return !(*this == x); }
|
---|
1370 | bool operator<=(const sparsetable& x) const { return *this < x || *this == x; }
|
---|
1371 | bool operator>(const sparsetable& x) const { return x <= *this; }
|
---|
1372 | bool operator>=(const sparsetable& x) const { return x < *this; }
|
---|
1373 |
|
---|
1374 |
|
---|
1375 | private:
|
---|
1376 | // The actual data
|
---|
1377 | vector< sparsegroup<value_type, GROUP_SIZE> > groups; // our list of groups
|
---|
1378 | size_type table_size; // how many buckets they want
|
---|
1379 | size_type num_buckets; // number of non-empty buckets
|
---|
1380 | };
|
---|
1381 |
|
---|
1382 | // We need a global swap as well
|
---|
1383 | template <class T, uint16_t GROUP_SIZE>
|
---|
1384 | inline void swap(sparsetable<T,GROUP_SIZE> &x, sparsetable<T,GROUP_SIZE> &y) {
|
---|
1385 | x.swap(y);
|
---|
1386 | }
|
---|
1387 |
|
---|
1388 | #undef GET_
|
---|
1389 | #undef PUT_
|
---|
1390 |
|
---|
1391 | _END_GOOGLE_NAMESPACE_
|
---|
1392 |
|
---|
1393 | #endif
|
---|