source: GTP/trunk/Lib/Vis/Preprocessing/src/sparsehash/src/google/sparsetable @ 2162

Revision 2162, 57.9 KB checked in by mattausch, 17 years ago (diff)

improved hash performance with google hashmap

Line 
1// Copyright (c) 2005, Google Inc.
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are
6// met:
7//
8//     * Redistributions of source code must retain the above copyright
9// notice, this list of conditions and the following disclaimer.
10//     * Redistributions in binary form must reproduce the above
11// copyright notice, this list of conditions and the following disclaimer
12// in the documentation and/or other materials provided with the
13// distribution.
14//     * Neither the name of Google Inc. nor the names of its
15// contributors may be used to endorse or promote products derived from
16// this software without specific prior written permission.
17//
18// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30// ---
31// Author: Craig Silverstein
32//
33// A sparsetable is a random container that implements a sparse array,
34// that is, an array that uses very little memory to store unassigned
35// indices (in this case, between 1-2 bits per unassigned index).  For
36// instance, if you allocate an array of size 5 and assign a[2] = <big
37// struct>, then a[2] will take up a lot of memory but a[0], a[1],
38// a[3], and a[4] will not.  Array elements that have a value are
39// called "assigned".  Array elements that have no value yet, or have
40// had their value cleared using erase() or clear(), are called
41// "unassigned".
42//
43// Unassigned values seem to have the default value of T (see below).
44// Nevertheless, there is a difference between an unassigned index and
45// one explicitly assigned the value of T().  The latter is considered
46// assigned.
47//
48// Access to an array element is constant time, as is insertion and
49// deletion.  Insertion and deletion may be fairly slow, however:
50// because of this container's memory economy, each insert and delete
51// causes a memory reallocation.
52//
53// See /usr/(local/)?doc/sparsehash-0.1/sparsetable.html
54// for information about how to use this class.
55
56#ifndef _SPARSETABLE_H_
57#define _SPARSETABLE_H_
58
59#include <google/sparsehash/config.h>
60#include <stdlib.h>             // for malloc/free
61#include <stdio.h>              // to read/write tables
62#if defined HAVE_STDINT_H
63#include <stdint.h>             // to get uint16_t (ISO naming madness)
64#elif defined HAVE_INTTYPES_H
65#include <inttypes.h>           // another place uint16_t might be defined
66#else
67#include <sys/types.h>          // our last best hope
68#endif
69#include <assert.h>             // for bounds checking
70#include <iterator>             // to define reverse_iterator<> for me
71#include <vector>               // a sparsetable is a vector of groups
72
73#if STDC_HEADERS
74#include <string.h>             // for memcpy and mmemmove
75#else
76#if !HAVE_MEMCPY
77#define memcpy(d, s, n)   bcopy ((s), (d), (n))
78#define memmove(d, s, n)  bcopy ((s), (d), (n))
79#endif
80#endif
81
82#if !defined HAVE_U_INT16_T && defined HAVE___UINT16
83typedef __uint16 u_int16_t        // true on vc++7
84#endif
85
86_START_GOOGLE_NAMESPACE_
87
88using STL_NAMESPACE::vector;
89
90// The smaller this is, the faster lookup is (because the group bitmap is
91// smaller) and the faster insert is, because there's less to memmove.
92// On the other hand, there are more groups.  Since group::size_type is
93// a short, this number should be of the form 32*x + 16 to avoid waste.
94static const uint16_t DEFAULT_SPARSEGROUP_SIZE = 48;   // fits in 1.5 words
95
96
97// A NOTE ON ASSIGNING:
98// A sparse table does not actually allocate memory for entries
99// that are not filled.  Because of this, it becomes complicated
100// to have a non-const iterator: we don't know, if the iterator points
101// to a not-filled bucket, whether you plan to fill it with something
102// or whether you plan to read its value (in which case you'll get
103// the default bucket value).  Therefore, while we can define const
104// operations in a pretty 'normal' way, for non-const operations, we
105// define something that returns a helper object with operator= and
106// operator& that allocate a bucket lazily.  We use this for table[]
107// and also for regular table iterators.
108
109template <class tabletype>
110class table_element_adaptor {
111 public:
112  typedef typename tabletype::value_type value_type;
113  typedef typename tabletype::size_type size_type;
114  typedef typename tabletype::reference reference;
115  typedef typename tabletype::pointer pointer;
116
117  table_element_adaptor(tabletype *tbl, size_type p)
118    : table(tbl), pos(p) { }
119  table_element_adaptor& operator= (value_type val) {
120    table->set(pos, val);
121    return *this;
122  }
123  operator value_type() { return table->get(pos); }   // we look like a value
124  pointer operator& () { return &table->mutating_get(pos); }
125
126 private:
127  tabletype* table;
128  size_type pos;
129};
130
131// Our iterator as simple as iterators can be: basically it's just
132// the index into our table.  Dereference, the only complicated
133// thing, we punt to the table class.  This just goes to show how
134// much machinery STL requires to do even the most trivial tasks.
135//
136// By templatizing over tabletype, we have one iterator type which
137// we can use for both sparsetables and sparsebins.  In fact it
138// works on any class that allows size() and operator[] (eg vector),
139// as long as it does the standard STL typedefs too (eg value_type).
140
141template <class tabletype>
142class table_iterator {
143 public:
144  typedef table_iterator iterator;
145
146#ifdef UNDERSTANDS_ITERATOR_TAGS
147  typedef STL_NAMESPACE::random_access_iterator_tag iterator_category;
148#endif
149  typedef typename tabletype::value_type value_type;
150  typedef typename tabletype::difference_type difference_type;
151  typedef typename tabletype::size_type size_type;
152  typedef table_element_adaptor<tabletype> reference;
153  typedef table_element_adaptor<tabletype>* pointer;
154
155  // The "real" constructor
156  table_iterator(tabletype *tbl, size_type p)
157    : table(tbl), pos(p) { }
158  // The default constructor, used when I define vars of type table::iterator
159  table_iterator() : table(NULL), pos(0) { }
160  // The copy constructor, for when I say table::iterator foo = tbl.begin()
161  // The default destructor is fine; we don't define one
162  // The default operator= is fine; we don't define one
163
164  // The main thing our iterator does is dereference.  If the table entry
165  // we point to is empty, we return the default value type.
166  // This is the big different function from the const iterator.
167  reference operator*()              {
168    return table_element_adaptor<tabletype>(table, pos);
169  }
170  pointer operator->()               { return &(operator*()); }
171
172  // Helper function to assert things are ok; eg pos is still in range
173  void check() const {
174    assert(table);
175    assert(pos <= table->size());
176  }
177
178  // Arithmetic: we just do arithmetic on pos.  We don't even need to
179  // do bounds checking, since STL doesn't consider that it's job.  :-)
180  iterator& operator+=(size_type t) { pos += t; check(); return *this; }
181  iterator& operator-=(size_type t) { pos -= t; check(); return *this; }
182  iterator& operator++()            { ++pos; check(); return *this; }
183  iterator& operator--()            { --pos; check(); return *this; }
184  iterator operator++(int)          { iterator tmp(*this);     // for x++
185                                      ++pos; check(); return tmp; }
186  iterator operator--(int)          { iterator tmp(*this);     // for x--
187                                      --pos; check(); return tmp; }
188  iterator operator+(difference_type i) const  { iterator tmp(*this);
189                                                 tmp += i; return tmp; }
190  iterator operator-(difference_type i) const  { iterator tmp(*this);
191                                                 tmp -= i; return tmp; }
192  difference_type operator-(iterator it) const {      // for "x = it2 - it"
193    assert(table == it.table);
194    return pos - it.pos;
195  }
196  reference operator[](difference_type n) const {
197    return *(*this + n);            // simple though not totally efficient
198  }
199
200  // Comparisons.
201  bool operator==(const iterator& it) const {
202    return table == it.table && pos == it.pos;
203  }
204  bool operator<(const iterator& it) const {
205    assert(table == it.table);              // life is bad bad bad otherwise
206    return pos < it.pos;
207  }
208  bool operator<=(const iterator& it) const {
209    assert(table == it.table);
210    return pos <= it.pos;
211  }
212  bool operator!=(const iterator& it) const { return !(*this == it); }
213  bool operator>(const iterator& it) const { return it <= *this; }
214  bool operator>=(const iterator& it) const { return it < *this; }
215
216  // Here's the info we actually need to be an iterator
217  tabletype *table;              // so we can dereference and bounds-check
218  size_type pos;                 // index into the table
219};
220
221// support for "3 + iterator" has to be defined outside the class, alas
222template<class T>
223table_iterator<T> operator+(typename table_iterator<T>::difference_type i,
224                            table_iterator<T> it) {
225  return it + i;               // so people can say it2 = 3 + it
226}
227
228template <class tabletype>
229class const_table_iterator {
230 public:
231  typedef table_iterator<tabletype> iterator;
232  typedef const_table_iterator const_iterator;
233
234#ifdef UNDERSTANDS_ITERATOR_TAGS
235  typedef STL_NAMESPACE::random_access_iterator_tag iterator_category;
236#endif
237  typedef typename tabletype::value_type value_type;
238  typedef typename tabletype::difference_type difference_type;
239  typedef typename tabletype::size_type size_type;
240  typedef typename tabletype::const_reference reference;  // we're const-only
241  typedef typename tabletype::const_pointer pointer;
242
243  // The "real" constructor
244  const_table_iterator(const tabletype *tbl, size_type p)
245    : table(tbl), pos(p) { }
246  // The default constructor, used when I define vars of type table::iterator
247  const_table_iterator() : table(NULL), pos(0) { }
248  // The copy constructor, for when I say table::iterator foo = tbl.begin()
249  // Also converts normal iterators to const iterators
250  const_table_iterator(const iterator &from)
251    : table(from.table), pos(from.pos) { }
252  // The default destructor is fine; we don't define one
253  // The default operator= is fine; we don't define one
254
255  // The main thing our iterator does is dereference.  If the table entry
256  // we point to is empty, we return the default value type.
257  reference operator*() const       { return (*table)[pos]; }
258  pointer operator->() const        { return &(operator*()); }
259
260  // Helper function to assert things are ok; eg pos is still in range
261  void check() const {
262    assert(table);
263    assert(pos <= table->size());
264  }
265
266  // Arithmetic: we just do arithmetic on pos.  We don't even need to
267  // do bounds checking, since STL doesn't consider that it's job.  :-)
268  const_iterator& operator+=(size_type t) { pos += t; check(); return *this; }
269  const_iterator& operator-=(size_type t) { pos -= t; check(); return *this; }
270  const_iterator& operator++()            { ++pos; check(); return *this; }
271  const_iterator& operator--()            { --pos; check(); return *this; }
272  const_iterator operator++(int)          { const_iterator tmp(*this); // for x++
273                                            ++pos; check(); return tmp; }
274  const_iterator operator--(int)          { const_iterator tmp(*this); // for x--
275                                            --pos; check(); return tmp; }
276  const_iterator operator+(difference_type i) const  { const_iterator tmp(*this);
277                                                       tmp += i; return tmp; }
278  const_iterator operator-(difference_type i) const  { const_iterator tmp(*this);
279                                                       tmp -= i; return tmp; }
280  difference_type operator-(const_iterator it) const {   // for "x = it2 - it"
281    assert(table == it.table);
282    return pos - it.pos;
283  }
284  reference operator[](difference_type n) const {
285    return *(*this + n);            // simple though not totally efficient
286  }
287
288  // Comparisons.
289  bool operator==(const const_iterator& it) const {
290    return table == it.table && pos == it.pos;
291  }
292  bool operator<(const const_iterator& it) const {
293    assert(table == it.table);              // life is bad bad bad otherwise
294    return pos < it.pos;
295  }
296  bool operator<=(const const_iterator& it) const {
297    assert(table == it.table);
298    return pos <= it.pos;
299  }
300  bool operator!=(const const_iterator& it) const { return !(*this == it); }
301  bool operator>(const const_iterator& it) const { return it <= *this; }
302  bool operator>=(const const_iterator& it) const { return it < *this; }
303
304  // Here's the info we actually need to be an iterator
305  const tabletype *table;        // so we can dereference and bounds-check
306  size_type pos;                 // index into the table
307};
308
309// support for "3 + iterator" has to be defined outside the class, alas
310template<class T>
311const_table_iterator<T> operator+(typename
312                                  const_table_iterator<T>::difference_type i,
313                                  const_table_iterator<T> it) {
314  return it + i;               // so people can say it2 = 3 + it
315}
316
317
318// ---------------------------------------------------------------------------
319
320
321/*
322// This is a 2-D iterator.  You specify a begin and end over a list
323// of *containers*.  We iterate over each container by iterating over
324// it.  It's actually simple:                                   
325// VECTOR.begin() VECTOR[0].begin()  --------> VECTOR[0].end() ---\
326//     |          ________________________________________________/
327//     |          \_> VECTOR[1].begin()  -------->  VECTOR[1].end() -\
328//     |          ___________________________________________________/
329//     v          \_> ......
330// VECTOR.end()
331//
332// It's impossible to do random access on one of these things in constant
333// time, so it's just a bidirectional iterator.
334//
335// Unfortunately, because we need to use this for a non-empty iterator,
336// we use nonempty_begin() and nonempty_end() instead of begin() and end()
337// (though only going across, not down).
338*/
339
340#define TWOD_BEGIN_      nonempty_begin
341#define TWOD_END_        nonempty_end
342#define TWOD_ITER_       nonempty_iterator
343#define TWOD_CONST_ITER_ const_nonempty_iterator
344
345template <class containertype>
346class two_d_iterator {
347 public:
348  typedef two_d_iterator iterator;
349
350#ifdef UNDERSTANDS_ITERATOR_TAGS
351  typedef STL_NAMESPACE::bidirectional_iterator_tag iterator_category;
352#endif
353  // apparently some versions of VC++ have trouble with two ::'s in a typename
354  typedef typename containertype::value_type _tmp_vt;
355  typedef typename _tmp_vt::value_type value_type;
356  typedef typename _tmp_vt::difference_type difference_type;
357  typedef typename _tmp_vt::reference reference;
358  typedef typename _tmp_vt::pointer pointer;
359
360  // The "real" constructor.  begin and end specify how many rows we have
361  // (in the diagram above); we always iterate over each row completely.
362  two_d_iterator(typename containertype::iterator begin,
363                 typename containertype::iterator end,
364                 typename containertype::iterator curr)
365    : row_begin(begin), row_end(end), row_current(curr), col_current() {
366    if ( row_current != row_end ) {
367      col_current = row_current->TWOD_BEGIN_();
368      advance_past_end();                 // in case cur->begin() == cur->end()
369    }
370  }
371  // If you want to start at an arbitrary place, you can, I guess
372  two_d_iterator(typename containertype::iterator begin,
373                 typename containertype::iterator end,
374                 typename containertype::iterator curr,
375                 typename containertype::value_type::TWOD_ITER_ col)
376    : row_begin(begin), row_end(end), row_current(curr), col_current(col) {
377    advance_past_end();                 // in case cur->begin() == cur->end()
378  }
379  // The default constructor, used when I define vars of type table::iterator
380  two_d_iterator() : row_begin(), row_end(), row_current(), col_current() { }
381  // The default destructor is fine; we don't define one
382  // The default operator= is fine; we don't define one
383
384  // Happy dereferencer
385  reference operator*() const    { return *col_current; }
386  pointer operator->() const     { return &(operator*()); }
387
388  // Arithmetic: we just do arithmetic on pos.  We don't even need to
389  // do bounds checking, since STL doesn't consider that it's job.  :-)
390  // NOTE: this is not amortized constant time!  What do we do about it?
391  void advance_past_end() {          // used when col_current points to end()
392    while ( col_current == row_current->TWOD_END_() ) {  // end of current row
393      ++row_current;                                // go to beginning of next
394      if ( row_current != row_end )                 // col is irrelevant at end
395        col_current = row_current->TWOD_BEGIN_();         
396      else                                         
397        break;                                      // don't go past row_end
398    }
399  }
400 
401  iterator& operator++() {
402    assert(row_current != row_end);                 // how to ++ from there?
403    ++col_current;
404    advance_past_end();                 // in case col_current is at end()
405    return *this;
406  }
407  iterator& operator--() {
408    while ( row_current == row_end ||
409            col_current == row_current->TWOD_BEGIN_() ) {
410      assert(row_current != row_begin);
411      --row_current;
412      col_current = row_current->TWOD_END_();             // this is 1 too far
413    }
414    --col_current;
415    return *this;
416  }
417  iterator operator++(int)       { iterator tmp(*this); ++*this; return tmp; }
418  iterator operator--(int)       { iterator tmp(*this); --*this; return tmp; }
419
420
421  // Comparisons.
422  bool operator==(const iterator& it) const {
423    return ( row_begin == it.row_begin &&
424             row_end == it.row_end &&
425             row_current == it.row_current &&
426             (row_current == row_end || col_current == it.col_current) );
427  }
428  bool operator!=(const iterator& it) const { return !(*this == it); }
429
430
431  // Here's the info we actually need to be an iterator
432  // These need to be public so we convert from iterator to const_iterator
433  typename containertype::iterator row_begin, row_end, row_current;
434  typename containertype::value_type::TWOD_ITER_ col_current;
435};
436
437// The same thing again, but this time const.  :-(
438template <class containertype>
439class const_two_d_iterator {
440 public:
441  typedef const_two_d_iterator iterator;
442
443#ifdef UNDERSTANDS_ITERATOR_TAGS
444  typedef STL_NAMESPACE::bidirectional_iterator_tag iterator_category;
445#endif
446  // apparently some versions of VC++ have trouble with two ::'s in a typename
447  typedef typename containertype::value_type _tmp_vt;
448  typedef typename _tmp_vt::value_type value_type;
449  typedef typename _tmp_vt::difference_type difference_type;
450  typedef typename _tmp_vt::const_reference reference;
451  typedef typename _tmp_vt::const_pointer pointer;
452
453  const_two_d_iterator(typename containertype::const_iterator begin,
454                       typename containertype::const_iterator end,
455                       typename containertype::const_iterator curr)
456    : row_begin(begin), row_end(end), row_current(curr), col_current() {
457    if ( curr != end ) {
458      col_current = curr->TWOD_BEGIN_();
459      advance_past_end();                 // in case cur->begin() == cur->end()
460    }
461  }
462  const_two_d_iterator(typename containertype::const_iterator begin,
463                       typename containertype::const_iterator end,
464                       typename containertype::const_iterator curr,
465                       typename containertype::value_type::TWOD_CONST_ITER_ col)
466    : row_begin(begin), row_end(end), row_current(curr), col_current(col) {
467    advance_past_end();                 // in case cur->begin() == cur->end()
468  }
469  const_two_d_iterator()
470    : row_begin(), row_end(), row_current(), col_current() {
471  }
472  // Need this explicitly so we can convert normal iterators to const iterators
473  const_two_d_iterator(const two_d_iterator<containertype>& it) :
474    row_begin(it.row_begin), row_end(it.row_end), row_current(it.row_current),
475    col_current(it.col_current) { }
476
477  typename containertype::const_iterator row_begin, row_end, row_current;
478  typename containertype::value_type::TWOD_CONST_ITER_ col_current;
479
480
481  // EVERYTHING FROM HERE DOWN IS THE SAME AS THE NON-CONST ITERATOR
482  reference operator*() const    { return *col_current; }
483  pointer operator->() const     { return &(operator*()); }
484
485  void advance_past_end() {          // used when col_current points to end()
486    while ( col_current == row_current->TWOD_END_() ) {  // end of current row
487      ++row_current;                                // go to beginning of next
488      if ( row_current != row_end )                 // col is irrelevant at end
489        col_current = row_current->TWOD_BEGIN_();         
490      else                                         
491        break;                                      // don't go past row_end
492    }
493  }
494  iterator& operator++() {
495    assert(row_current != row_end);                 // how to ++ from there?
496    ++col_current;
497    advance_past_end();                 // in case col_current is at end()
498    return *this;
499  }
500  iterator& operator--() {
501    while ( row_current == row_end ||
502            col_current == row_current->TWOD_BEGIN_() ) {
503      assert(row_current != row_begin);
504      --row_current;
505      col_current = row_current->TWOD_END_();             // this is 1 too far
506    }
507    --col_current;
508    return *this;
509  }
510  iterator operator++(int)       { iterator tmp(*this); ++*this; return tmp; }
511  iterator operator--(int)       { iterator tmp(*this); --*this; return tmp; }
512
513  bool operator==(const iterator& it) const {
514    return ( row_begin == it.row_begin &&
515             row_end == it.row_end &&
516             row_current == it.row_current &&
517             (row_current == row_end || col_current == it.col_current) );
518  }
519  bool operator!=(const iterator& it) const { return !(*this == it); }
520};
521
522// We provide yet another version, to be as frugal with memory as
523// possible.  This one frees each block of memory as it finishes
524// iterating over it.  By the end, the entire table is freed.
525// For understandable reasons, you can only iterate over it once,
526// which is why it's an input iterator
527template <class containertype>
528class destructive_two_d_iterator {
529 public:
530  typedef destructive_two_d_iterator iterator;
531
532#ifdef UNDERSTANDS_ITERATOR_TAGS
533  typedef STL_NAMESPACE::input_iterator_tag iterator_category;
534#endif
535  // apparently some versions of VC++ have trouble with two ::'s in a typename
536  typedef typename containertype::value_type _tmp_vt;
537  typedef typename _tmp_vt::value_type value_type;
538  typedef typename _tmp_vt::difference_type difference_type;
539  typedef typename _tmp_vt::reference reference;
540  typedef typename _tmp_vt::pointer pointer;
541
542  destructive_two_d_iterator(typename containertype::iterator begin,
543                             typename containertype::iterator end,
544                             typename containertype::iterator curr)
545    : row_begin(begin), row_end(end), row_current(curr), col_current() {
546    if ( curr != end ) {
547      col_current = curr->TWOD_BEGIN_();
548      advance_past_end();                 // in case cur->begin() == cur->end()
549    }
550  }
551  destructive_two_d_iterator(typename containertype::iterator begin,
552                             typename containertype::iterator end,
553                             typename containertype::iterator curr,
554                             typename containertype::value_type::TWOD_ITER_ col)
555    : row_begin(begin), row_end(end), row_current(curr), col_current(col) {
556    advance_past_end();                 // in case cur->begin() == cur->end()
557  }
558  destructive_two_d_iterator()
559    : row_begin(), row_end(), row_current(), col_current() {
560  }
561
562  typename containertype::iterator row_begin, row_end, row_current;
563  typename containertype::value_type::TWOD_ITER_ col_current;
564
565  // This is the part that destroys
566  void advance_past_end() {          // used when col_current points to end()
567    while ( col_current == row_current->TWOD_END_() ) {  // end of current row
568      row_current->clear();                         // the destructive part
569      // It would be nice if we could decrement sparsetable->num_buckets here
570      ++row_current;                                // go to beginning of next
571      if ( row_current != row_end )                 // col is irrelevant at end
572        col_current = row_current->TWOD_BEGIN_();         
573      else                                         
574        break;                                      // don't go past row_end
575    }
576  }
577
578  // EVERYTHING FROM HERE DOWN IS THE SAME AS THE REGULAR ITERATOR
579  reference operator*() const    { return *col_current; }
580  pointer operator->() const     { return &(operator*()); }
581
582  iterator& operator++() {
583    assert(row_current != row_end);                 // how to ++ from there?
584    ++col_current;
585    advance_past_end();                 // in case col_current is at end()
586    return *this;
587  }
588  iterator operator++(int)       { iterator tmp(*this); ++*this; return tmp; }
589
590  bool operator==(const iterator& it) const {
591    return ( row_begin == it.row_begin &&
592             row_end == it.row_end &&
593             row_current == it.row_current &&
594             (row_current == row_end || col_current == it.col_current) );
595  }
596  bool operator!=(const iterator& it) const { return !(*this == it); }
597};
598
599#undef TWOD_BEGIN_
600#undef TWOD_END_
601#undef TWOD_ITER_
602#undef TWOD_CONST_ITER_
603
604
605
606
607// SPARSE-TABLE
608// ------------
609// The idea is that a table with (logically) t buckets is divided
610// into t/M *groups* of M buckets each.  (M is a constant set in
611// GROUP_SIZE for efficiency.)  Each group is stored sparsely.
612// Thus, inserting into the table causes some array to grow, which is
613// slow but still constant time.  Lookup involves doing a
614// logical-position-to-sparse-position lookup, which is also slow but
615// constant time.  The larger M is, the slower these operations are
616// but the less overhead (slightly).
617//
618// To store the sparse array, we store a bitmap B, where B[i] = 1 iff
619// bucket i is non-empty.  Then to look up bucket i we really look up
620// array[# of 1s before i in B].  This is constant time for fixed M.
621//
622// Terminology: the position of an item in the overall table (from
623// 1 .. t) is called its "location."  The logical position in a group
624// (from 1 .. M ) is called its "position."  The actual location in
625// the array (from 1 .. # of non-empty buckets in the group) is
626// called its "offset."
627
628// The weird mod in the offset is entirely to quiet compiler warnings
629#define PUT_(take_from, offset)                                              \
630  if ( putc(((take_from) >> ((offset) % (sizeof(take_from)*8))) % 256, fp)   \
631       == EOF )                                                              \
632    return false
633
634#define GET_(add_to, offset)                            \
635  if ((x=getc(fp)) == EOF)                              \
636    return false;                                       \
637  else                                                  \
638    add_to |= (static_cast<size_type>(x) << ((offset) % (sizeof(add_to)*8)))
639
640
641template <class T, uint16_t GROUP_SIZE>
642class sparsegroup {
643 public:
644  // Basic types
645  typedef T value_type;
646  typedef value_type* pointer;
647  typedef const value_type* const_pointer;
648  typedef table_iterator<sparsegroup<T, GROUP_SIZE> > iterator;
649  typedef const_table_iterator<sparsegroup<T, GROUP_SIZE> > const_iterator;
650  typedef table_element_adaptor<sparsegroup<T, GROUP_SIZE> > element_adaptor;
651  typedef value_type &reference;
652  typedef const value_type &const_reference;
653  typedef uint16_t size_type;                  // max # of buckets
654  typedef int16_t difference_type;
655  typedef STL_NAMESPACE::reverse_iterator<const_iterator> const_reverse_iterator;
656  typedef STL_NAMESPACE::reverse_iterator<iterator> reverse_iterator;
657
658  // These are our special iterators, that go over non-empty buckets in a
659  // group.  These aren't const-only because you can change non-empty bcks.
660  typedef pointer nonempty_iterator;
661  typedef const_pointer const_nonempty_iterator;
662  typedef STL_NAMESPACE::reverse_iterator<nonempty_iterator> reverse_nonempty_iterator;
663  typedef STL_NAMESPACE::reverse_iterator<const_nonempty_iterator> const_reverse_nonempty_iterator;
664
665  // Iterator functions
666  iterator begin()                      { return iterator(this, 0); }
667  const_iterator begin() const          { return const_iterator(this, 0); }
668  iterator end()                        { return iterator(this, size()); }
669  const_iterator end() const            { return const_iterator(this, size()); }
670  reverse_iterator rbegin()             { return reverse_iterator(end()); }
671  const_reverse_iterator rbegin() const { return const_reverse_iterator(end()); }
672  reverse_iterator rend()               { return reverse_iterator(begin()); }
673  const_reverse_iterator rend() const { return const_reverse_iterator(begin()); }
674
675  // We'll have versions for our special non-empty iterator too
676  nonempty_iterator nonempty_begin()             { return group; }
677  const_nonempty_iterator nonempty_begin() const { return group; }
678  nonempty_iterator nonempty_end()               { return group + num_buckets; }
679  const_nonempty_iterator nonempty_end() const   { return group + num_buckets; }
680  reverse_nonempty_iterator nonempty_rbegin() {
681    return reverse_nonempty_iterator(nonempty_end());
682  }
683  const_reverse_nonempty_iterator nonempty_rbegin() const {
684    return const_reverse_nonempty_iterator(nonempty_end());
685  }
686  reverse_nonempty_iterator nonempty_rend() {
687    return reverse_nonempty_iterator(nonempty_begin());
688  }
689  const_reverse_nonempty_iterator nonempty_rend() const {
690    return const_reverse_nonempty_iterator(nonempty_begin());
691  }
692
693
694  // This gives us the "default" value to return for an empty bucket.
695  // We just use the default constructor on T, the template type
696  const_reference default_value() const {
697    static value_type defaultval = value_type();
698    return defaultval;
699  }
700
701
702 private:
703  // We need to do all this bit manipulation, of course.  ick
704  static size_type charbit(size_type i)  { return i >> 3; }
705  static size_type modbit(size_type i)   { return 1 << (i&7); }
706  bool bmtest(size_type i) const   { return bitmap[charbit(i)] & modbit(i); }
707  void bmset(size_type i)          { bitmap[charbit(i)] |= modbit(i); }
708  void bmclear(size_type i)        { bitmap[charbit(i)] &= ~modbit(i); }
709
710  // malloc / realloc that dies if allocation fails
711  void* realloc_or_die(void* ptr, size_t num_bytes) {
712    void* retval = realloc(ptr, num_bytes);
713    if (retval == NULL) {
714      fprintf(stderr, "FATAL ERROR: failed to allocate %d bytes for ptr %p",
715              num_bytes, ptr);
716      exit(1);
717    }
718    return retval;
719  }
720
721  void* malloc_or_die(size_t num_bytes) {
722    return realloc_or_die(NULL, num_bytes);
723  }
724
725 public:                         // get_iter() in sparsetable needs it
726  // We need a small function that tells us how many set bits there are
727  // in positions 0..i-1 of the bitmap.  It uses a big table.
728  // We make it static so templates don't allocate lots of these tables
729  static size_type pos_to_offset(const unsigned char *bm, size_type pos) {
730    // We could make these ints.  The tradeoff is size (eg does it overwhelm
731    // the cache?) vs efficiency in referencing sub-word-sized array elements
732    static const char bits_in[256] = {      // # of bits set in one char
733      0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
734      1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
735      1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
736      2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
737      1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
738      2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
739      2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
740      3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
741      1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
742      2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
743      2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
744      3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
745      2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
746      3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
747      3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
748      4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8,
749    };
750    size_type retval = 0;
751   
752    // [Note: condition pos > 8 is an optimization; convince yourself we
753    // give exactly the same result as if we had pos >= 8 here instead.]
754    for ( ; pos > 8; pos -= 8 )                    // bm[0..pos/8-1]
755      retval += bits_in[*bm++];                    // chars we want *all* bits in
756    return retval + bits_in[*bm & ((1 << pos)-1)]; // the char that includes pos
757  }
758
759  size_type pos_to_offset(size_type pos) const {   // not static but still const
760    return pos_to_offset(bitmap, pos);
761  }
762
763
764 public:
765  // Constructors -- default and copy -- and destructor
766  sparsegroup() : group(0), num_buckets(0) { memset(bitmap, 0, sizeof(bitmap)); }
767  sparsegroup(const sparsegroup& x) : group(0), num_buckets(x.num_buckets) {
768    if ( num_buckets ) {
769      group = (value_type *)malloc_or_die(x.num_buckets * sizeof(*group));
770      memcpy(group, x.group, x.num_buckets * sizeof(*group));
771    }
772    memcpy(bitmap, x.bitmap, sizeof(bitmap));
773  }
774  ~sparsegroup() { free(group); }       // free(NULL) does the right thing
775
776  // Operator= is just like the copy constructor, I guess
777  sparsegroup &operator=(const sparsegroup& x) {
778    if ( &x == this ) return *this;                    // x = x
779    if ( x.num_buckets == 0 ) {
780      free(group);
781      group = NULL;
782    } else {
783      group = (value_type *)
784              realloc_or_die(group, x.num_buckets * sizeof(*group));
785      memcpy(group, x.group, x.num_buckets * sizeof(*group));
786    }
787    memcpy(bitmap, x.bitmap, sizeof(bitmap));
788    num_buckets = x.num_buckets;
789    return *this;
790  }
791
792  // Many STL algorithms use swap instead of copy constructors
793  void swap(sparsegroup& x) {
794    STL_NAMESPACE::swap(group, x.group);
795    for ( int i = 0; i < sizeof(bitmap) / sizeof(*bitmap); ++i )
796      STL_NAMESPACE::swap(bitmap[i], x.bitmap[i]);  // swap not defined on arrays
797    STL_NAMESPACE::swap(num_buckets, x.num_buckets);
798  }
799
800  // It's always nice to be able to clear a table without deallocating it
801  void clear() {
802    if ( group )
803      free(group);
804    group = NULL;
805    memset(bitmap, 0, sizeof(bitmap));
806    num_buckets = 0;
807  }
808
809  // Functions that tell you about size.  Alas, these aren't so useful
810  // because our table is always fixed size.
811  size_type size() const           { return GROUP_SIZE; }
812  size_type max_size() const       { return GROUP_SIZE; }
813  bool empty() const               { return false; }
814  // We also may want to know how many *used* buckets there are
815  size_type num_nonempty() const   { return num_buckets; }
816
817
818  // get()/set() are explicitly const/non-const.  You can use [] if
819  // you want something that can be either (potentially more expensive).
820  const_reference get(size_type i) const {
821    if ( bmtest(i) )           // bucket i is occupied
822      return group[pos_to_offset(bitmap, i)];
823    else
824      return default_value();  // return the default reference
825  }
826
827  // TODO(csilvers): make protected + friend
828  reference mutating_get(size_type i) {    // fills bucket i before getting
829    if ( !bmtest(i) )
830      set(i, default_value());
831    return group[pos_to_offset(bitmap, i)];
832  }
833
834  // Syntactic sugar.  It's easy to return a const reference.  To
835  // return a non-const reference, we need to use the assigner adaptor.
836  const_reference operator[](size_type i) const {
837    return get(i);
838  }
839
840  element_adaptor operator[](size_type i) {
841    return element_adaptor(this, i);
842  }
843
844  // This returns a reference to the inserted item (which is a copy of val)
845  reference set(size_type i, const_reference val) {
846    size_type offset = pos_to_offset(bitmap, i);  // where we'll find (or insert)
847    if ( !bmtest(i) ) {                           // make space at group[offset]
848      // We know the realloc and memmove are safe because we ensure
849      // value_type is a Plain Old Data Type (see below)
850      group = (value_type *)
851              realloc_or_die(group, sizeof(*group) * ++num_buckets);
852      memmove(group + offset+1, group + offset,
853              (num_buckets-1 - offset) * sizeof(*group));
854      bmset(i);
855    }
856    // This does the actual inserting.  Since we made the array using
857    // malloc, we use "placement new" to just call the constructor.
858    // We really should destruct this later, via group[offset].~value_type(),
859    // every time we free(), but we don't because it's a POD type.
860    new(&group[offset]) value_type(val);
861    return group[offset];
862  }
863
864  // We let you see if a bucket is non-empty without retrieving it
865  bool test(size_type i) const {
866    return bmtest(i);
867  }
868  bool test(iterator pos) const {
869    return bmtest(pos.pos);
870  }
871
872  // This takes the specified elements out of the group.  This is
873  // "undefining", rather than "clearing".
874  void erase(size_type i) {
875    if ( bmtest(i) ) {                           // trivial to erase empty bucket
876      size_type offset = pos_to_offset(bitmap,i); // where we'll find (or insert)
877      if ( --num_buckets == 0 ) {
878        free(group);
879        group = NULL;
880      } else {
881        memmove(group + offset, group + offset+1,
882                (num_buckets - offset) * sizeof(*group));
883        group = (value_type *)
884                realloc_or_die(group, sizeof(*group) * num_buckets);
885      }
886      bmclear(i);
887    }
888    }
889
890  void erase(iterator pos) {
891    erase(pos.pos);
892  }
893
894  void erase(iterator start, iterator end) {
895    // This could be more efficient, but to do so we'd need to make
896    // bmclear() clear a range of indices.  Doesn't seem worth it.
897    for ( ; start != end; ++start )
898      erase(start);
899  }
900
901
902  // I/O
903  // We support reading and writing groups to disk.  We don't store
904  // the actual array contents (which we don't know how to store),
905  // just the bitmap and size.  Meant to be used with table I/O.
906  // Returns true if all was ok
907  bool write_metadata(FILE *fp) const {
908    assert(sizeof(num_buckets) == 2);     // we explicitly set to u_int16_t
909    PUT_(num_buckets, 8);
910    PUT_(num_buckets, 0);
911    if ( !fwrite(bitmap, sizeof(bitmap), 1, fp) )  return false;
912    return true;
913  }
914
915  // Reading destroys the old group contents!  Returns true if all was ok
916  bool read_metadata(FILE *fp) {
917    clear();
918   
919    int x;          // the GET_ macro requires an 'int x' to be defined
920    GET_(num_buckets, 8);
921    GET_(num_buckets, 0);
922
923    if ( !fread(bitmap, sizeof(bitmap), 1, fp) )  return false;
924
925    // We'll allocate the space; you'd better fill it somehow!
926    group = (value_type *)malloc(num_buckets * sizeof(*group));
927    if ( group == NULL )  return false;
928    return true;
929  }
930
931  // If your keys and values are simple enough, we can write them
932  // to disk for you.  "simple enough" means no pointers.
933  // However, we don't try to normalize endianness
934  bool write_nopointer_data(FILE *fp) const {
935    for ( const_nonempty_iterator it = nonempty_begin();
936          it != nonempty_end(); ++it ) {
937      if ( !fwrite(&*it, sizeof(*it), 1, fp) )  return false;
938    }
939    return true;
940  }
941
942  // When reading, we have to override the potential const-ness of *it
943  bool read_nopointer_data(FILE *fp) {
944    for ( nonempty_iterator it = nonempty_begin();
945          it != nonempty_end(); ++it ) {
946      if ( !fread(reinterpret_cast<void*>(&(*it)), sizeof(*it), 1, fp) )
947        return false;
948    }
949    return true;
950  }
951
952  // Comparisons.  Note the comparisons are pretty arbitrary: we
953  // compare values of the first index that isn't equal (using default
954  // value for empty buckets).
955  bool operator==(const sparsegroup& x) const {
956    return ( num_buckets == x.num_buckets &&
957             memcmp(bitmap, x.bitmap, sizeof(bitmap)) == 0 &&
958             STL_NAMESPACE::equal(begin(), end(), x.begin()) ); // from algorithm
959  }
960  bool operator<(const sparsegroup& x) const {      // also from algorithm
961    return STL_NAMESPACE::lexicographical_compare(begin(), end(),
962                                                  x.begin(), x.end());
963  }
964  bool operator!=(const sparsegroup& x) const { return !(*this == x); }
965  bool operator<=(const sparsegroup& x) const { return *this < x || *this == x; }
966  bool operator>(const sparsegroup& x) const { return x <= *this; }
967  bool operator>=(const sparsegroup& x) const { return x < *this; }
968
969 private:
970  // We need to enforce that our value_type is a Plain Old Data Type
971  // (so we know realloc and memmove will work).  We use traits to
972  // enforce this.  The following gives a compile-time error if
973  // is_POD_type is false (which is the default for user types).
974  //
975  // IF YOU GET AN ERROR HERE, make sure your class is a POD type,
976  // and if so tell the compiler via code similar to this:
977  // template<> struct __type_traits<classname> {
978  //   typedef __true_type    has_trivial_default_constructor;
979  //   typedef __true_type    has_trivial_copy_constructor;
980  //   typedef __true_type    has_trivial_assignment_operator;
981  //   typedef __true_type    has_trivial_destructor;
982  //   typedef __true_type    is_POD_type;
983  // };
984  //
985  // If this is part of a hash_map, you need to make sure both the
986  // Key and Value types are POD types, if they're user-defined.
987#ifdef UNDERSTANDS_TYPE_TRAITS
988  static __true_type * const enforce_pod;
989#endif
990
991  // The actual data
992  value_type *group;                            // (small) array of T's
993  unsigned char bitmap[(GROUP_SIZE-1)/8 + 1];   // fancy math is so we round up
994  size_type num_buckets;                        // limits GROUP_SIZE to 64K
995};
996
997// We need a global swap as well
998template <class T, uint16_t GROUP_SIZE>
999inline void swap(sparsegroup<T,GROUP_SIZE> &x, sparsegroup<T,GROUP_SIZE> &y) {
1000  x.swap(y);
1001}
1002
1003#ifdef UNDERSTANDS_TYPE_TRAITS
1004template <class T, uint16_t GROUP_SIZE>
1005__true_type * const sparsegroup<T, GROUP_SIZE>::enforce_pod =
1006static_cast<typename __type_traits<value_type>::is_POD_type *>(0);
1007#endif
1008
1009// ---------------------------------------------------------------------------
1010
1011
1012template <class T, uint16_t GROUP_SIZE = DEFAULT_SPARSEGROUP_SIZE>
1013class sparsetable {
1014 public:
1015  // Basic types
1016  typedef T value_type;                        // stolen from stl_vector.h
1017  typedef value_type* pointer;
1018  typedef const value_type* const_pointer;
1019  typedef table_iterator<sparsetable<T, GROUP_SIZE> > iterator;
1020  typedef const_table_iterator<sparsetable<T, GROUP_SIZE> > const_iterator;
1021  typedef table_element_adaptor<sparsetable<T, GROUP_SIZE> > element_adaptor;
1022  typedef value_type &reference;
1023  typedef const value_type &const_reference;
1024  typedef size_t size_type;
1025  typedef ptrdiff_t difference_type;
1026  typedef STL_NAMESPACE::reverse_iterator<const_iterator> const_reverse_iterator;
1027  typedef STL_NAMESPACE::reverse_iterator<iterator> reverse_iterator;
1028
1029  // These are our special iterators, that go over non-empty buckets in a
1030  // table.  These aren't const only because you can change non-empty bcks.
1031  typedef two_d_iterator< vector< sparsegroup<value_type, GROUP_SIZE> > >
1032     nonempty_iterator;
1033  typedef const_two_d_iterator< vector< sparsegroup<value_type, GROUP_SIZE> > >
1034     const_nonempty_iterator;
1035  typedef STL_NAMESPACE::reverse_iterator<nonempty_iterator> reverse_nonempty_iterator;
1036  typedef STL_NAMESPACE::reverse_iterator<const_nonempty_iterator> const_reverse_nonempty_iterator;
1037  // Another special iterator: it frees memory as it iterates (used to resize)
1038  typedef destructive_two_d_iterator< vector< sparsegroup<value_type, GROUP_SIZE> > >
1039     destructive_iterator;
1040
1041  // Iterator functions
1042  iterator begin()                      { return iterator(this, 0); }
1043  const_iterator begin() const          { return const_iterator(this, 0); }
1044  iterator end()                        { return iterator(this, size()); }
1045  const_iterator end() const            { return const_iterator(this, size()); }
1046  reverse_iterator rbegin()             { return reverse_iterator(end()); }
1047  const_reverse_iterator rbegin() const { return const_reverse_iterator(end()); }
1048  reverse_iterator rend()               { return reverse_iterator(begin()); }
1049  const_reverse_iterator rend() const { return const_reverse_iterator(begin()); }
1050
1051  // Versions for our special non-empty iterator
1052  nonempty_iterator nonempty_begin()             {
1053    return nonempty_iterator(groups.begin(), groups.end(), groups.begin());
1054  }
1055  const_nonempty_iterator nonempty_begin() const {
1056    return const_nonempty_iterator(groups.begin(),groups.end(), groups.begin());
1057  }
1058  nonempty_iterator nonempty_end() {
1059    return nonempty_iterator(groups.begin(), groups.end(), groups.end());
1060  }
1061  const_nonempty_iterator nonempty_end() const {
1062    return const_nonempty_iterator(groups.begin(), groups.end(), groups.end());
1063  }
1064  reverse_nonempty_iterator nonempty_rbegin() {
1065    return reverse_nonempty_iterator(nonempty_end());
1066  }
1067  const_reverse_nonempty_iterator nonempty_rbegin() const {
1068    return const_reverse_nonempty_iterator(nonempty_end());
1069  }
1070  reverse_nonempty_iterator nonempty_rend() {
1071    return reverse_nonempty_iterator(nonempty_begin());
1072  }
1073  const_reverse_nonempty_iterator nonempty_rend() const {
1074    return const_reverse_nonempty_iterator(nonempty_begin());
1075  }
1076  destructive_iterator destructive_begin() {
1077    return destructive_iterator(groups.begin(), groups.end(), groups.begin());
1078  }
1079  destructive_iterator destructive_end() {
1080    return destructive_iterator(groups.begin(), groups.end(), groups.end());
1081  }
1082
1083 private:
1084  typedef typename vector< sparsegroup<value_type, GROUP_SIZE> >::reference
1085    GroupsReference;
1086  typedef typename
1087    vector< sparsegroup<value_type, GROUP_SIZE> >::const_reference
1088    GroupsConstReference;
1089  typedef typename vector< sparsegroup<value_type, GROUP_SIZE> >::iterator
1090    GroupsIterator;
1091  typedef typename vector< sparsegroup<value_type, GROUP_SIZE> >::const_iterator
1092    GroupsConstIterator;
1093
1094  // How to deal with the proper group
1095  static size_type num_groups(size_type num) {   // how many to hold num buckets
1096    return num == 0 ? 0 : ((num-1) / GROUP_SIZE) + 1;
1097  }
1098
1099  size_type pos_in_group(size_type i) const { return i % GROUP_SIZE; }
1100  size_type group_num(size_type i) const    { return i / GROUP_SIZE; }
1101  GroupsReference which_group(size_type i)  { return groups[group_num(i)]; }
1102  GroupsConstReference which_group(size_type i) const
1103                                            { return groups[group_num(i)]; }
1104
1105
1106 public:
1107  // Constructors -- default, normal (when you specify size), and copy
1108  sparsetable(size_type size = 0)
1109    : groups(num_groups(size)), table_size(size), num_buckets(0) { }
1110  // We'll can get away with using the default copy constructor,
1111  // and default destructor, and hence the default operator=.  Huzzah!
1112
1113  // Many STL algorithms use swap instead of copy constructors
1114  void swap(sparsetable& x) {
1115    STL_NAMESPACE::swap(groups, x.groups);
1116    STL_NAMESPACE::swap(table_size, x.table_size);
1117    STL_NAMESPACE::swap(num_buckets, x.num_buckets);
1118  }
1119
1120  // It's always nice to be able to clear a table without deallocating it
1121  void clear() {
1122    GroupsIterator group;
1123    for ( group = groups.begin(); group != groups.end(); ++group ) {
1124      group->clear();
1125    }
1126    num_buckets = 0;
1127  }
1128
1129  // Functions that tell you about size.
1130  // NOTE: empty() is non-intuitive!  It does not tell you the number
1131  // of not-empty buckets (use num_nonempty() for that).  Instead
1132  // it says whether you've allocated any buckets or not.
1133  size_type size() const           { return table_size; }
1134  size_type max_size() const       { return size_type(-1); }
1135  bool empty() const               { return table_size == 0; }
1136  // We also may want to know how many *used* buckets there are
1137  size_type num_nonempty() const   { return num_buckets; }
1138
1139  // OK, we'll let you resize one of these puppies
1140  void resize(size_type new_size) {
1141    groups.resize(num_groups(new_size));
1142    if ( new_size < table_size) {   // lower num_buckets, clear last group
1143      if ( new_size % GROUP_SIZE > 0 )       // need to clear from last group
1144        groups.back().erase(groups.back().begin() + (new_size % GROUP_SIZE),
1145                            groups.back().end());
1146      num_buckets = 0;                       // refigure # of used buckets
1147      GroupsConstIterator group;
1148      for ( group = groups.begin(); group != groups.end(); ++group )
1149        num_buckets += group->num_nonempty();
1150    }
1151    table_size = new_size;
1152  }
1153
1154
1155  // We let you see if a bucket is non-empty without retrieving it
1156  bool test(size_type i) const {
1157    return which_group(i).test(pos_in_group(i));
1158  }
1159  bool test(iterator pos) const {
1160    return which_group(pos.pos).test(pos_in_group(pos.pos));
1161  }
1162  bool test(const_iterator pos) const {
1163    return which_group(pos.pos).test(pos_in_group(pos.pos));
1164  }
1165
1166  // We only return const_references because it's really hard to
1167  // return something settable for empty buckets.  Use set() instead.
1168  const_reference get(size_type i) const {
1169    assert(i < table_size);
1170    return which_group(i).get(pos_in_group(i));
1171  }
1172
1173  // TODO(csilvers): make protected + friend element_adaptor
1174  reference mutating_get(size_type i) {    // fills bucket i before getting
1175    assert(i < table_size);
1176    size_type old_numbuckets = which_group(i).num_nonempty();
1177    reference retval = which_group(i).mutating_get(pos_in_group(i));
1178    num_buckets += which_group(i).num_nonempty() - old_numbuckets;
1179    return retval;
1180  }
1181
1182  // Syntactic sugar.  As in sparsegroup, the non-const version is harder
1183  const_reference operator[](size_type i) const {
1184    return get(i);
1185  }
1186
1187  element_adaptor operator[](size_type i) {
1188    return element_adaptor(this, i);
1189  }
1190
1191  // Needed for hashtables, gets as a nonempty_iterator.  Crashes for empty bcks
1192  const_nonempty_iterator get_iter(size_type i) const {
1193    assert(test(i));    // how can a nonempty_iterator point to an empty bucket?
1194    return const_nonempty_iterator(
1195      groups.begin(), groups.end(),
1196      groups.begin() + group_num(i),
1197      (groups[group_num(i)].nonempty_begin() +
1198       groups[group_num(i)].pos_to_offset(pos_in_group(i))));
1199  }
1200  // For nonempty we can return a non-const version
1201  nonempty_iterator get_iter(size_type i) {
1202    assert(test(i));    // how can a nonempty_iterator point to an empty bucket?
1203    return nonempty_iterator(
1204      groups.begin(), groups.end(),
1205      groups.begin() + group_num(i),
1206      (groups[group_num(i)].nonempty_begin() +
1207       groups[group_num(i)].pos_to_offset(pos_in_group(i))));
1208  }
1209
1210
1211  // This returns a reference to the inserted item (which is a copy of val)
1212  // The trick is to figure out whether we're replacing or inserting anew
1213  reference set(size_type i, const_reference val) {
1214    assert(i < table_size);
1215    size_type old_numbuckets = which_group(i).num_nonempty();
1216    reference retval = which_group(i).set(pos_in_group(i), val);
1217    num_buckets += which_group(i).num_nonempty() - old_numbuckets;
1218    return retval;
1219  }
1220
1221  // This takes the specified elements out of the table.  This is
1222  // "undefining", rather than "clearing".
1223  void erase(size_type i) {
1224    assert(i < table_size);
1225    size_type old_numbuckets = which_group(i).num_nonempty();
1226    which_group(i).erase(pos_in_group(i));
1227    num_buckets += which_group(i).num_nonempty() - old_numbuckets;
1228  }
1229
1230  void erase(iterator pos) {
1231    erase(pos.pos);
1232  }
1233
1234  void erase(iterator start, iterator end) {
1235    // This could be more efficient, but then we'd need to figure
1236    // out if we spanned groups or not.  Doesn't seem worth it.
1237    for ( ; start != end; ++start )
1238      erase(start);
1239  }
1240 
1241
1242  // We support reading and writing tables to disk.  We don't store
1243  // the actual array contents (which we don't know how to store),
1244  // just the groups and sizes.  Returns true if all went ok.
1245
1246 private:
1247  // Every time the disk format changes, this should probably change too
1248  static const unsigned long MAGIC_NUMBER = 0x24687531;
1249
1250  // Old versions of this code write all data in 32 bits.  We need to
1251  // support these files as well as having support for 64-bit systems.
1252  // So we use the following encoding scheme: for values < 2^32-1, we
1253  // store in 4 bytes in big-endian order.  For values > 2^32, we
1254  // store 0xFFFFFFF followed by 8 bytes in big-endian order.  This
1255  // causes us to mis-read old-version code that stores exactly
1256  // 0xFFFFFFF, but I don't think that is likely to have happened for
1257  // these particular values.
1258  static bool write_32_or_64(FILE* fp, size_type value) {
1259    if ( value < 0xFFFFFFFFULL ) {        // fits in 4 bytes
1260      PUT_(value, 24);
1261      PUT_(value, 16);
1262      PUT_(value, 8);
1263      PUT_(value, 0);
1264    } else if ( value == 0xFFFFFFFFUL ) {   // special case in 32bit systems
1265      PUT_(0xFF, 0); PUT_(0xFF, 0); PUT_(0xFF, 0); PUT_(0xFF, 0);  // marker
1266      PUT_(0, 0); PUT_(0, 0); PUT_(0, 0); PUT_(0, 0);
1267      PUT_(0xFF, 0); PUT_(0xFF, 0); PUT_(0xFF, 0); PUT_(0xFF, 0);
1268    } else {
1269      PUT_(0xFF, 0); PUT_(0xFF, 0); PUT_(0xFF, 0); PUT_(0xFF, 0);  // marker
1270      PUT_(value, 56);
1271      PUT_(value, 48);
1272      PUT_(value, 40);
1273      PUT_(value, 32);
1274      PUT_(value, 24);
1275      PUT_(value, 16);
1276      PUT_(value, 8);
1277      PUT_(value, 0);
1278    }
1279    return true;
1280  }
1281
1282  static bool read_32_or_64(FILE* fp, size_type *value) {  // reads into value
1283    size_type first4 = 0;
1284    int x;
1285    GET_(first4, 24);
1286    GET_(first4, 16);
1287    GET_(first4, 8);
1288    GET_(first4, 0);
1289    if ( first4 < 0xFFFFFFFFULL ) {
1290      *value = first4;
1291    } else {
1292      GET_(*value, 56);
1293      GET_(*value, 48);
1294      GET_(*value, 40);
1295      GET_(*value, 32);
1296      GET_(*value, 24);
1297      GET_(*value, 16);
1298      GET_(*value, 8);
1299      GET_(*value, 0);
1300    }
1301    return true;
1302  }
1303
1304 public:
1305  bool write_metadata(FILE *fp) const {
1306    if ( !write_32_or_64(fp, MAGIC_NUMBER) )  return false;
1307    if ( !write_32_or_64(fp, table_size) )  return false;
1308    if ( !write_32_or_64(fp, num_buckets) )  return false;
1309
1310    GroupsConstIterator group;
1311    for ( group = groups.begin(); group != groups.end(); ++group )
1312      if ( group->write_metadata(fp) == false )  return false;
1313    return true;
1314  }
1315
1316  // Reading destroys the old table contents!  Returns true if read ok.
1317  bool read_metadata(FILE *fp) {
1318    size_type magic_read = 0;
1319    if ( !read_32_or_64(fp, &magic_read) )  return false;
1320    if ( magic_read != MAGIC_NUMBER ) {
1321      clear();                        // just to be consistent
1322      return false;
1323    }
1324
1325    if ( !read_32_or_64(fp, &table_size) )  return false;
1326    if ( !read_32_or_64(fp, &num_buckets) )  return false;
1327
1328    resize(table_size);                            // so the vector's sized ok
1329    GroupsIterator group;
1330    for ( group = groups.begin(); group != groups.end(); ++group )
1331      if ( group->read_metadata(fp) == false )  return false;
1332    return true;
1333  }
1334
1335  // This code is identical to that for SparseGroup
1336  // If your keys and values are simple enough, we can write them
1337  // to disk for you.  "simple enough" means no pointers.
1338  // However, we don't try to normalize endianness
1339  bool write_nopointer_data(FILE *fp) const {
1340    for ( const_nonempty_iterator it = nonempty_begin();
1341          it != nonempty_end(); ++it ) {
1342      if ( !fwrite(&*it, sizeof(*it), 1, fp) )  return false;
1343    }
1344    return true;
1345  }
1346
1347  // When reading, we have to override the potential const-ness of *it
1348  bool read_nopointer_data(FILE *fp) {
1349    for ( nonempty_iterator it = nonempty_begin();
1350          it != nonempty_end(); ++it ) {
1351      if ( !fread(reinterpret_cast<void*>(&(*it)), sizeof(*it), 1, fp) )
1352        return false;
1353    }
1354    return true;
1355  }
1356
1357  // Comparisons.  Note the comparisons are pretty arbitrary: we
1358  // compare values of the first index that isn't equal (using default
1359  // value for empty buckets).
1360  bool operator==(const sparsetable& x) const {
1361    return ( table_size == x.table_size &&
1362             num_buckets == x.num_buckets &&
1363             groups == x.groups );
1364  }
1365  bool operator<(const sparsetable& x) const {      // also from algobase.h
1366    return STL_NAMESPACE::lexicographical_compare(begin(), end(),
1367                                                  x.begin(), x.end());
1368  }
1369  bool operator!=(const sparsetable& x) const { return !(*this == x); }
1370  bool operator<=(const sparsetable& x) const { return *this < x || *this == x; }
1371  bool operator>(const sparsetable& x) const { return x <= *this; }
1372  bool operator>=(const sparsetable& x) const { return x < *this; }
1373
1374
1375 private:
1376  // The actual data
1377  vector< sparsegroup<value_type, GROUP_SIZE> > groups;  // our list of groups
1378  size_type table_size;                         // how many buckets they want
1379  size_type num_buckets;                        // number of non-empty buckets
1380};
1381
1382// We need a global swap as well
1383template <class T, uint16_t GROUP_SIZE>
1384inline void swap(sparsetable<T,GROUP_SIZE> &x, sparsetable<T,GROUP_SIZE> &y) {
1385  x.swap(y);
1386}
1387
1388#undef GET_
1389#undef PUT_
1390
1391_END_GOOGLE_NAMESPACE_
1392
1393#endif
Note: See TracBrowser for help on using the repository browser.