Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

sparsetable @ 2162

Revision 2162, 57.9 KB checked in by mattausch, 17 years ago (diff)
improved hash performance with google hashmap

Line
1	// Copyright (c) 2005, Google Inc.
2	// All rights reserved.
3	//
4	// Redistribution and use in source and binary forms, with or without
5	// modification, are permitted provided that the following conditions are
6	// met:
7	//
8	// * Redistributions of source code must retain the above copyright
9	// notice, this list of conditions and the following disclaimer.
10	// * Redistributions in binary form must reproduce the above
11	// copyright notice, this list of conditions and the following disclaimer
12	// in the documentation and/or other materials provided with the
13	// distribution.
14	// * Neither the name of Google Inc. nor the names of its
15	// contributors may be used to endorse or promote products derived from
16	// this software without specific prior written permission.
17	//
18	// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19	// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20	// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21	// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22	// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23	// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24	// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25	// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26	// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27	// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28	// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
30	// ---
31	// Author: Craig Silverstein
32	//
33	// A sparsetable is a random container that implements a sparse array,
34	// that is, an array that uses very little memory to store unassigned
35	// indices (in this case, between 1-2 bits per unassigned index). For
36	// instance, if you allocate an array of size 5 and assign a[2] = <big
37	// struct>, then a[2] will take up a lot of memory but a[0], a[1],
38	// a[3], and a[4] will not. Array elements that have a value are
39	// called "assigned". Array elements that have no value yet, or have
40	// had their value cleared using erase() or clear(), are called
41	// "unassigned".
42	//
43	// Unassigned values seem to have the default value of T (see below).
44	// Nevertheless, there is a difference between an unassigned index and
45	// one explicitly assigned the value of T(). The latter is considered
46	// assigned.
47	//
48	// Access to an array element is constant time, as is insertion and
49	// deletion. Insertion and deletion may be fairly slow, however:
50	// because of this container's memory economy, each insert and delete
51	// causes a memory reallocation.
52	//
53	// See /usr/(local/)?doc/sparsehash-0.1/sparsetable.html
54	// for information about how to use this class.
55
56	#ifndef _SPARSETABLE_H_
57	#define _SPARSETABLE_H_
58
59	#include <google/sparsehash/config.h>
60	#include <stdlib.h> // for malloc/free
61	#include <stdio.h> // to read/write tables
62	#if defined HAVE_STDINT_H
63	#include <stdint.h> // to get uint16_t (ISO naming madness)
64	#elif defined HAVE_INTTYPES_H
65	#include <inttypes.h> // another place uint16_t might be defined
66	#else
67	#include <sys/types.h> // our last best hope
68	#endif
69	#include <assert.h> // for bounds checking
70	#include <iterator> // to define reverse_iterator<> for me
71	#include <vector> // a sparsetable is a vector of groups
72
73	#if STDC_HEADERS
74	#include <string.h> // for memcpy and mmemmove
75	#else
76	#if !HAVE_MEMCPY
77	#define memcpy(d, s, n) bcopy ((s), (d), (n))
78	#define memmove(d, s, n) bcopy ((s), (d), (n))
79	#endif
80	#endif
81
82	#if !defined HAVE_U_INT16_T && defined HAVE___UINT16
83	typedef __uint16 u_int16_t // true on vc++7
84	#endif
85
86	_START_GOOGLE_NAMESPACE_
87
88	using STL_NAMESPACE::vector;
89
90	// The smaller this is, the faster lookup is (because the group bitmap is
91	// smaller) and the faster insert is, because there's less to memmove.
92	// On the other hand, there are more groups. Since group::size_type is
93	// a short, this number should be of the form 32*x + 16 to avoid waste.
94	static const uint16_t DEFAULT_SPARSEGROUP_SIZE = 48; // fits in 1.5 words
95
96
97	// A NOTE ON ASSIGNING:
98	// A sparse table does not actually allocate memory for entries
99	// that are not filled. Because of this, it becomes complicated
100	// to have a non-const iterator: we don't know, if the iterator points
101	// to a not-filled bucket, whether you plan to fill it with something
102	// or whether you plan to read its value (in which case you'll get
103	// the default bucket value). Therefore, while we can define const
104	// operations in a pretty 'normal' way, for non-const operations, we
105	// define something that returns a helper object with operator= and
106	// operator& that allocate a bucket lazily. We use this for table[]
107	// and also for regular table iterators.
108
109	template <class tabletype>
110	class table_element_adaptor {
111	public:
112	typedef typename tabletype::value_type value_type;
113	typedef typename tabletype::size_type size_type;
114	typedef typename tabletype::reference reference;
115	typedef typename tabletype::pointer pointer;
116
117	table_element_adaptor(tabletype *tbl, size_type p)
118	: table(tbl), pos(p) { }
119	table_element_adaptor& operator= (value_type val) {
120	table->set(pos, val);
121	return *this;
122	}
123	operator value_type() { return table->get(pos); } // we look like a value
124	pointer operator& () { return &table->mutating_get(pos); }
125
126	private:
127	tabletype* table;
128	size_type pos;
129	};
130
131	// Our iterator as simple as iterators can be: basically it's just
132	// the index into our table. Dereference, the only complicated
133	// thing, we punt to the table class. This just goes to show how
134	// much machinery STL requires to do even the most trivial tasks.
135	//
136	// By templatizing over tabletype, we have one iterator type which
137	// we can use for both sparsetables and sparsebins. In fact it
138	// works on any class that allows size() and operator[] (eg vector),
139	// as long as it does the standard STL typedefs too (eg value_type).
140
141	template <class tabletype>
142	class table_iterator {
143	public:
144	typedef table_iterator iterator;
145
146	#ifdef UNDERSTANDS_ITERATOR_TAGS
147	typedef STL_NAMESPACE::random_access_iterator_tag iterator_category;
148	#endif
149	typedef typename tabletype::value_type value_type;
150	typedef typename tabletype::difference_type difference_type;
151	typedef typename tabletype::size_type size_type;
152	typedef table_element_adaptor<tabletype> reference;
153	typedef table_element_adaptor<tabletype>* pointer;
154
155	// The "real" constructor
156	table_iterator(tabletype *tbl, size_type p)
157	: table(tbl), pos(p) { }
158	// The default constructor, used when I define vars of type table::iterator
159	table_iterator() : table(NULL), pos(0) { }
160	// The copy constructor, for when I say table::iterator foo = tbl.begin()
161	// The default destructor is fine; we don't define one
162	// The default operator= is fine; we don't define one
163
164	// The main thing our iterator does is dereference. If the table entry
165	// we point to is empty, we return the default value type.
166	// This is the big different function from the const iterator.
167	reference operator*() {
168	return table_element_adaptor<tabletype>(table, pos);
169	}
170	pointer operator->() { return &(operator*()); }
171
172	// Helper function to assert things are ok; eg pos is still in range
173	void check() const {
174	assert(table);
175	assert(pos <= table->size());
176	}
177
178	// Arithmetic: we just do arithmetic on pos. We don't even need to
179	// do bounds checking, since STL doesn't consider that it's job. :-)
180	iterator& operator+=(size_type t) { pos += t; check(); return *this; }
181	iterator& operator-=(size_type t) { pos -= t; check(); return *this; }
182	iterator& operator++() { ++pos; check(); return *this; }
183	iterator& operator--() { --pos; check(); return *this; }
184	iterator operator++(int) { iterator tmp(*this); // for x++
185	++pos; check(); return tmp; }
186	iterator operator--(int) { iterator tmp(*this); // for x--
187	--pos; check(); return tmp; }
188	iterator operator+(difference_type i) const { iterator tmp(*this);
189	tmp += i; return tmp; }
190	iterator operator-(difference_type i) const { iterator tmp(*this);
191	tmp -= i; return tmp; }
192	difference_type operator-(iterator it) const { // for "x = it2 - it"
193	assert(table == it.table);
194	return pos - it.pos;
195	}
196	reference operator[](difference_type n) const {
197	return (this + n); // simple though not totally efficient
198	}
199
200	// Comparisons.
201	bool operator==(const iterator& it) const {
202	return table == it.table && pos == it.pos;
203	}
204	bool operator<(const iterator& it) const {
205	assert(table == it.table); // life is bad bad bad otherwise
206	return pos < it.pos;
207	}
208	bool operator<=(const iterator& it) const {
209	assert(table == it.table);
210	return pos <= it.pos;
211	}
212	bool operator!=(const iterator& it) const { return !(*this == it); }
213	bool operator>(const iterator& it) const { return it <= *this; }
214	bool operator>=(const iterator& it) const { return it < *this; }
215
216	// Here's the info we actually need to be an iterator
217	tabletype *table; // so we can dereference and bounds-check
218	size_type pos; // index into the table
219	};
220
221	// support for "3 + iterator" has to be defined outside the class, alas
222	template<class T>
223	table_iterator<T> operator+(typename table_iterator<T>::difference_type i,
224	table_iterator<T> it) {
225	return it + i; // so people can say it2 = 3 + it
226	}
227
228	template <class tabletype>
229	class const_table_iterator {
230	public:
231	typedef table_iterator<tabletype> iterator;
232	typedef const_table_iterator const_iterator;
233
234	#ifdef UNDERSTANDS_ITERATOR_TAGS
235	typedef STL_NAMESPACE::random_access_iterator_tag iterator_category;
236	#endif
237	typedef typename tabletype::value_type value_type;
238	typedef typename tabletype::difference_type difference_type;
239	typedef typename tabletype::size_type size_type;
240	typedef typename tabletype::const_reference reference; // we're const-only
241	typedef typename tabletype::const_pointer pointer;
242
243	// The "real" constructor
244	const_table_iterator(const tabletype *tbl, size_type p)
245	: table(tbl), pos(p) { }
246	// The default constructor, used when I define vars of type table::iterator
247	const_table_iterator() : table(NULL), pos(0) { }
248	// The copy constructor, for when I say table::iterator foo = tbl.begin()
249	// Also converts normal iterators to const iterators
250	const_table_iterator(const iterator &from)
251	: table(from.table), pos(from.pos) { }
252	// The default destructor is fine; we don't define one
253	// The default operator= is fine; we don't define one
254
255	// The main thing our iterator does is dereference. If the table entry
256	// we point to is empty, we return the default value type.
257	reference operator() const { return (table)[pos]; }
258	pointer operator->() const { return &(operator*()); }
259
260	// Helper function to assert things are ok; eg pos is still in range
261	void check() const {
262	assert(table);
263	assert(pos <= table->size());
264	}
265
266	// Arithmetic: we just do arithmetic on pos. We don't even need to
267	// do bounds checking, since STL doesn't consider that it's job. :-)
268	const_iterator& operator+=(size_type t) { pos += t; check(); return *this; }
269	const_iterator& operator-=(size_type t) { pos -= t; check(); return *this; }
270	const_iterator& operator++() { ++pos; check(); return *this; }
271	const_iterator& operator--() { --pos; check(); return *this; }
272	const_iterator operator++(int) { const_iterator tmp(*this); // for x++
273	++pos; check(); return tmp; }
274	const_iterator operator--(int) { const_iterator tmp(*this); // for x--
275	--pos; check(); return tmp; }
276	const_iterator operator+(difference_type i) const { const_iterator tmp(*this);
277	tmp += i; return tmp; }
278	const_iterator operator-(difference_type i) const { const_iterator tmp(*this);
279	tmp -= i; return tmp; }
280	difference_type operator-(const_iterator it) const { // for "x = it2 - it"
281	assert(table == it.table);
282	return pos - it.pos;
283	}
284	reference operator[](difference_type n) const {
285	return (this + n); // simple though not totally efficient
286	}
287
288	// Comparisons.
289	bool operator==(const const_iterator& it) const {
290	return table == it.table && pos == it.pos;
291	}
292	bool operator<(const const_iterator& it) const {
293	assert(table == it.table); // life is bad bad bad otherwise
294	return pos < it.pos;
295	}
296	bool operator<=(const const_iterator& it) const {
297	assert(table == it.table);
298	return pos <= it.pos;
299	}
300	bool operator!=(const const_iterator& it) const { return !(*this == it); }
301	bool operator>(const const_iterator& it) const { return it <= *this; }
302	bool operator>=(const const_iterator& it) const { return it < *this; }
303
304	// Here's the info we actually need to be an iterator
305	const tabletype *table; // so we can dereference and bounds-check
306	size_type pos; // index into the table
307	};
308
309	// support for "3 + iterator" has to be defined outside the class, alas
310	template<class T>
311	const_table_iterator<T> operator+(typename
312	const_table_iterator<T>::difference_type i,
313	const_table_iterator<T> it) {
314	return it + i; // so people can say it2 = 3 + it
315	}
316
317
318	// ---------------------------------------------------------------------------
319
320
321	/*
322	// This is a 2-D iterator. You specify a begin and end over a list
323	// of containers. We iterate over each container by iterating over
324	// it. It's actually simple:
325	// VECTOR.begin() VECTOR[0].begin() --------> VECTOR[0].end() ---\
326	// \| ________________________________________________/
327	// \| \_> VECTOR[1].begin() --------> VECTOR[1].end() -\
328	// \| ___________________________________________________/
329	// v \_> ......
330	// VECTOR.end()
331	//
332	// It's impossible to do random access on one of these things in constant
333	// time, so it's just a bidirectional iterator.
334	//
335	// Unfortunately, because we need to use this for a non-empty iterator,
336	// we use nonempty_begin() and nonempty_end() instead of begin() and end()
337	// (though only going across, not down).
338	*/
339
340	#define TWOD_BEGIN_ nonempty_begin
341	#define TWOD_END_ nonempty_end
342	#define TWOD_ITER_ nonempty_iterator
343	#define TWOD_CONST_ITER_ const_nonempty_iterator
344
345	template <class containertype>
346	class two_d_iterator {
347	public:
348	typedef two_d_iterator iterator;
349
350	#ifdef UNDERSTANDS_ITERATOR_TAGS
351	typedef STL_NAMESPACE::bidirectional_iterator_tag iterator_category;
352	#endif
353	// apparently some versions of VC++ have trouble with two ::'s in a typename
354	typedef typename containertype::value_type _tmp_vt;
355	typedef typename _tmp_vt::value_type value_type;
356	typedef typename _tmp_vt::difference_type difference_type;
357	typedef typename _tmp_vt::reference reference;
358	typedef typename _tmp_vt::pointer pointer;
359
360	// The "real" constructor. begin and end specify how many rows we have
361	// (in the diagram above); we always iterate over each row completely.
362	two_d_iterator(typename containertype::iterator begin,
363	typename containertype::iterator end,
364	typename containertype::iterator curr)
365	: row_begin(begin), row_end(end), row_current(curr), col_current() {
366	if ( row_current != row_end ) {
367	col_current = row_current->TWOD_BEGIN_();
368	advance_past_end(); // in case cur->begin() == cur->end()
369	}
370	}
371	// If you want to start at an arbitrary place, you can, I guess
372	two_d_iterator(typename containertype::iterator begin,
373	typename containertype::iterator end,
374	typename containertype::iterator curr,
375	typename containertype::value_type::TWOD_ITER_ col)
376	: row_begin(begin), row_end(end), row_current(curr), col_current(col) {
377	advance_past_end(); // in case cur->begin() == cur->end()
378	}
379	// The default constructor, used when I define vars of type table::iterator
380	two_d_iterator() : row_begin(), row_end(), row_current(), col_current() { }
381	// The default destructor is fine; we don't define one
382	// The default operator= is fine; we don't define one
383
384	// Happy dereferencer
385	reference operator() const { return col_current; }
386	pointer operator->() const { return &(operator*()); }
387
388	// Arithmetic: we just do arithmetic on pos. We don't even need to
389	// do bounds checking, since STL doesn't consider that it's job. :-)
390	// NOTE: this is not amortized constant time! What do we do about it?
391	void advance_past_end() { // used when col_current points to end()
392	while ( col_current == row_current->TWOD_END_() ) { // end of current row
393	++row_current; // go to beginning of next
394	if ( row_current != row_end ) // col is irrelevant at end
395	col_current = row_current->TWOD_BEGIN_();
396	else
397	break; // don't go past row_end
398	}
399	}
400
401	iterator& operator++() {
402	assert(row_current != row_end); // how to ++ from there?
403	++col_current;
404	advance_past_end(); // in case col_current is at end()
405	return *this;
406	}
407	iterator& operator--() {
408	while ( row_current == row_end \|\|
409	col_current == row_current->TWOD_BEGIN_() ) {
410	assert(row_current != row_begin);
411	--row_current;
412	col_current = row_current->TWOD_END_(); // this is 1 too far
413	}
414	--col_current;
415	return *this;
416	}
417	iterator operator++(int) { iterator tmp(this); ++this; return tmp; }
418	iterator operator--(int) { iterator tmp(this); --this; return tmp; }
419
420
421	// Comparisons.
422	bool operator==(const iterator& it) const {
423	return ( row_begin == it.row_begin &&
424	row_end == it.row_end &&
425	row_current == it.row_current &&
426	(row_current == row_end \|\| col_current == it.col_current) );
427	}
428	bool operator!=(const iterator& it) const { return !(*this == it); }
429
430
431	// Here's the info we actually need to be an iterator
432	// These need to be public so we convert from iterator to const_iterator
433	typename containertype::iterator row_begin, row_end, row_current;
434	typename containertype::value_type::TWOD_ITER_ col_current;
435	};
436
437	// The same thing again, but this time const. :-(
438	template <class containertype>
439	class const_two_d_iterator {
440	public:
441	typedef const_two_d_iterator iterator;
442
443	#ifdef UNDERSTANDS_ITERATOR_TAGS
444	typedef STL_NAMESPACE::bidirectional_iterator_tag iterator_category;
445	#endif
446	// apparently some versions of VC++ have trouble with two ::'s in a typename
447	typedef typename containertype::value_type _tmp_vt;
448	typedef typename _tmp_vt::value_type value_type;
449	typedef typename _tmp_vt::difference_type difference_type;
450	typedef typename _tmp_vt::const_reference reference;
451	typedef typename _tmp_vt::const_pointer pointer;
452
453	const_two_d_iterator(typename containertype::const_iterator begin,
454	typename containertype::const_iterator end,
455	typename containertype::const_iterator curr)
456	: row_begin(begin), row_end(end), row_current(curr), col_current() {
457	if ( curr != end ) {
458	col_current = curr->TWOD_BEGIN_();
459	advance_past_end(); // in case cur->begin() == cur->end()
460	}
461	}
462	const_two_d_iterator(typename containertype::const_iterator begin,
463	typename containertype::const_iterator end,
464	typename containertype::const_iterator curr,
465	typename containertype::value_type::TWOD_CONST_ITER_ col)
466	: row_begin(begin), row_end(end), row_current(curr), col_current(col) {
467	advance_past_end(); // in case cur->begin() == cur->end()
468	}
469	const_two_d_iterator()
470	: row_begin(), row_end(), row_current(), col_current() {
471	}
472	// Need this explicitly so we can convert normal iterators to const iterators
473	const_two_d_iterator(const two_d_iterator<containertype>& it) :
474	row_begin(it.row_begin), row_end(it.row_end), row_current(it.row_current),
475	col_current(it.col_current) { }
476
477	typename containertype::const_iterator row_begin, row_end, row_current;
478	typename containertype::value_type::TWOD_CONST_ITER_ col_current;
479
480
481	// EVERYTHING FROM HERE DOWN IS THE SAME AS THE NON-CONST ITERATOR
482	reference operator() const { return col_current; }
483	pointer operator->() const { return &(operator*()); }
484
485	void advance_past_end() { // used when col_current points to end()
486	while ( col_current == row_current->TWOD_END_() ) { // end of current row
487	++row_current; // go to beginning of next
488	if ( row_current != row_end ) // col is irrelevant at end
489	col_current = row_current->TWOD_BEGIN_();
490	else
491	break; // don't go past row_end
492	}
493	}
494	iterator& operator++() {
495	assert(row_current != row_end); // how to ++ from there?
496	++col_current;
497	advance_past_end(); // in case col_current is at end()
498	return *this;
499	}
500	iterator& operator--() {
501	while ( row_current == row_end \|\|
502	col_current == row_current->TWOD_BEGIN_() ) {
503	assert(row_current != row_begin);
504	--row_current;
505	col_current = row_current->TWOD_END_(); // this is 1 too far
506	}
507	--col_current;
508	return *this;
509	}
510	iterator operator++(int) { iterator tmp(this); ++this; return tmp; }
511	iterator operator--(int) { iterator tmp(this); --this; return tmp; }
512
513	bool operator==(const iterator& it) const {
514	return ( row_begin == it.row_begin &&
515	row_end == it.row_end &&
516	row_current == it.row_current &&
517	(row_current == row_end \|\| col_current == it.col_current) );
518	}
519	bool operator!=(const iterator& it) const { return !(*this == it); }
520	};
521
522	// We provide yet another version, to be as frugal with memory as
523	// possible. This one frees each block of memory as it finishes
524	// iterating over it. By the end, the entire table is freed.
525	// For understandable reasons, you can only iterate over it once,
526	// which is why it's an input iterator
527	template <class containertype>
528	class destructive_two_d_iterator {
529	public:
530	typedef destructive_two_d_iterator iterator;
531
532	#ifdef UNDERSTANDS_ITERATOR_TAGS
533	typedef STL_NAMESPACE::input_iterator_tag iterator_category;
534	#endif
535	// apparently some versions of VC++ have trouble with two ::'s in a typename
536	typedef typename containertype::value_type _tmp_vt;
537	typedef typename _tmp_vt::value_type value_type;
538	typedef typename _tmp_vt::difference_type difference_type;
539	typedef typename _tmp_vt::reference reference;
540	typedef typename _tmp_vt::pointer pointer;
541
542	destructive_two_d_iterator(typename containertype::iterator begin,
543	typename containertype::iterator end,
544	typename containertype::iterator curr)
545	: row_begin(begin), row_end(end), row_current(curr), col_current() {
546	if ( curr != end ) {
547	col_current = curr->TWOD_BEGIN_();
548	advance_past_end(); // in case cur->begin() == cur->end()
549	}
550	}
551	destructive_two_d_iterator(typename containertype::iterator begin,
552	typename containertype::iterator end,
553	typename containertype::iterator curr,
554	typename containertype::value_type::TWOD_ITER_ col)
555	: row_begin(begin), row_end(end), row_current(curr), col_current(col) {
556	advance_past_end(); // in case cur->begin() == cur->end()
557	}
558	destructive_two_d_iterator()
559	: row_begin(), row_end(), row_current(), col_current() {
560	}
561
562	typename containertype::iterator row_begin, row_end, row_current;
563	typename containertype::value_type::TWOD_ITER_ col_current;
564
565	// This is the part that destroys
566	void advance_past_end() { // used when col_current points to end()
567	while ( col_current == row_current->TWOD_END_() ) { // end of current row
568	row_current->clear(); // the destructive part
569	// It would be nice if we could decrement sparsetable->num_buckets here
570	++row_current; // go to beginning of next
571	if ( row_current != row_end ) // col is irrelevant at end
572	col_current = row_current->TWOD_BEGIN_();
573	else
574	break; // don't go past row_end
575	}
576	}
577
578	// EVERYTHING FROM HERE DOWN IS THE SAME AS THE REGULAR ITERATOR
579	reference operator() const { return col_current; }
580	pointer operator->() const { return &(operator*()); }
581
582	iterator& operator++() {
583	assert(row_current != row_end); // how to ++ from there?
584	++col_current;
585	advance_past_end(); // in case col_current is at end()
586	return *this;
587	}
588	iterator operator++(int) { iterator tmp(this); ++this; return tmp; }
589
590	bool operator==(const iterator& it) const {
591	return ( row_begin == it.row_begin &&
592	row_end == it.row_end &&
593	row_current == it.row_current &&
594	(row_current == row_end \|\| col_current == it.col_current) );
595	}
596	bool operator!=(const iterator& it) const { return !(*this == it); }
597	};
598
599	#undef TWOD_BEGIN_
600	#undef TWOD_END_
601	#undef TWOD_ITER_
602	#undef TWOD_CONST_ITER_
603
604
605
606
607	// SPARSE-TABLE
608	// ------------
609	// The idea is that a table with (logically) t buckets is divided
610	// into t/M groups of M buckets each. (M is a constant set in
611	// GROUP_SIZE for efficiency.) Each group is stored sparsely.
612	// Thus, inserting into the table causes some array to grow, which is
613	// slow but still constant time. Lookup involves doing a
614	// logical-position-to-sparse-position lookup, which is also slow but
615	// constant time. The larger M is, the slower these operations are
616	// but the less overhead (slightly).
617	//
618	// To store the sparse array, we store a bitmap B, where B[i] = 1 iff
619	// bucket i is non-empty. Then to look up bucket i we really look up
620	// array[# of 1s before i in B]. This is constant time for fixed M.
621	//
622	// Terminology: the position of an item in the overall table (from
623	// 1 .. t) is called its "location." The logical position in a group
624	// (from 1 .. M ) is called its "position." The actual location in
625	// the array (from 1 .. # of non-empty buckets in the group) is
626	// called its "offset."
627
628	// The weird mod in the offset is entirely to quiet compiler warnings
629	#define PUT_(take_from, offset) \
630	if ( putc(((take_from) >> ((offset) % (sizeof(take_from)*8))) % 256, fp) \
631	== EOF ) \
632	return false
633
634	#define GET_(add_to, offset) \
635	if ((x=getc(fp)) == EOF) \
636	return false; \
637	else \
638	add_to \|= (static_cast<size_type>(x) << ((offset) % (sizeof(add_to)*8)))
639
640
641	template <class T, uint16_t GROUP_SIZE>
642	class sparsegroup {
643	public:
644	// Basic types
645	typedef T value_type;
646	typedef value_type* pointer;
647	typedef const value_type* const_pointer;
648	typedef table_iterator<sparsegroup<T, GROUP_SIZE> > iterator;
649	typedef const_table_iterator<sparsegroup<T, GROUP_SIZE> > const_iterator;
650	typedef table_element_adaptor<sparsegroup<T, GROUP_SIZE> > element_adaptor;
651	typedef value_type &reference;
652	typedef const value_type &const_reference;
653	typedef uint16_t size_type; // max # of buckets
654	typedef int16_t difference_type;
655	typedef STL_NAMESPACE::reverse_iterator<const_iterator> const_reverse_iterator;
656	typedef STL_NAMESPACE::reverse_iterator<iterator> reverse_iterator;
657
658	// These are our special iterators, that go over non-empty buckets in a
659	// group. These aren't const-only because you can change non-empty bcks.
660	typedef pointer nonempty_iterator;
661	typedef const_pointer const_nonempty_iterator;
662	typedef STL_NAMESPACE::reverse_iterator<nonempty_iterator> reverse_nonempty_iterator;
663	typedef STL_NAMESPACE::reverse_iterator<const_nonempty_iterator> const_reverse_nonempty_iterator;
664
665	// Iterator functions
666	iterator begin() { return iterator(this, 0); }
667	const_iterator begin() const { return const_iterator(this, 0); }
668	iterator end() { return iterator(this, size()); }
669	const_iterator end() const { return const_iterator(this, size()); }
670	reverse_iterator rbegin() { return reverse_iterator(end()); }
671	const_reverse_iterator rbegin() const { return const_reverse_iterator(end()); }
672	reverse_iterator rend() { return reverse_iterator(begin()); }
673	const_reverse_iterator rend() const { return const_reverse_iterator(begin()); }
674
675	// We'll have versions for our special non-empty iterator too
676	nonempty_iterator nonempty_begin() { return group; }
677	const_nonempty_iterator nonempty_begin() const { return group; }
678	nonempty_iterator nonempty_end() { return group + num_buckets; }
679	const_nonempty_iterator nonempty_end() const { return group + num_buckets; }
680	reverse_nonempty_iterator nonempty_rbegin() {
681	return reverse_nonempty_iterator(nonempty_end());
682	}
683	const_reverse_nonempty_iterator nonempty_rbegin() const {
684	return const_reverse_nonempty_iterator(nonempty_end());
685	}
686	reverse_nonempty_iterator nonempty_rend() {
687	return reverse_nonempty_iterator(nonempty_begin());
688	}
689	const_reverse_nonempty_iterator nonempty_rend() const {
690	return const_reverse_nonempty_iterator(nonempty_begin());
691	}
692
693
694	// This gives us the "default" value to return for an empty bucket.
695	// We just use the default constructor on T, the template type
696	const_reference default_value() const {
697	static value_type defaultval = value_type();
698	return defaultval;
699	}
700
701
702	private:
703	// We need to do all this bit manipulation, of course. ick
704	static size_type charbit(size_type i) { return i >> 3; }
705	static size_type modbit(size_type i) { return 1 << (i&7); }
706	bool bmtest(size_type i) const { return bitmap[charbit(i)] & modbit(i); }
707	void bmset(size_type i) { bitmap[charbit(i)] \|= modbit(i); }
708	void bmclear(size_type i) { bitmap[charbit(i)] &= ~modbit(i); }
709
710	// malloc / realloc that dies if allocation fails
711	void* realloc_or_die(void* ptr, size_t num_bytes) {
712	void* retval = realloc(ptr, num_bytes);
713	if (retval == NULL) {
714	fprintf(stderr, "FATAL ERROR: failed to allocate %d bytes for ptr %p",
715	num_bytes, ptr);
716	exit(1);
717	}
718	return retval;
719	}
720
721	void* malloc_or_die(size_t num_bytes) {
722	return realloc_or_die(NULL, num_bytes);
723	}
724
725	public: // get_iter() in sparsetable needs it
726	// We need a small function that tells us how many set bits there are
727	// in positions 0..i-1 of the bitmap. It uses a big table.
728	// We make it static so templates don't allocate lots of these tables
729	static size_type pos_to_offset(const unsigned char *bm, size_type pos) {
730	// We could make these ints. The tradeoff is size (eg does it overwhelm
731	// the cache?) vs efficiency in referencing sub-word-sized array elements
732	static const char bits_in[256] = { // # of bits set in one char
733	0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
734	1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
735	1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
736	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
737	1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
738	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
739	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
740	3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
741	1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
742	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
743	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
744	3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
745	2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
746	3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
747	3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
748	4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8,
749	};
750	size_type retval = 0;
751
752	// [Note: condition pos > 8 is an optimization; convince yourself we
753	// give exactly the same result as if we had pos >= 8 here instead.]
754	for ( ; pos > 8; pos -= 8 ) // bm[0..pos/8-1]
755	retval += bits_in[bm++]; // chars we want all* bits in
756	return retval + bits_in[*bm & ((1 << pos)-1)]; // the char that includes pos
757	}
758
759	size_type pos_to_offset(size_type pos) const { // not static but still const
760	return pos_to_offset(bitmap, pos);
761	}
762
763
764	public:
765	// Constructors -- default and copy -- and destructor
766	sparsegroup() : group(0), num_buckets(0) { memset(bitmap, 0, sizeof(bitmap)); }
767	sparsegroup(const sparsegroup& x) : group(0), num_buckets(x.num_buckets) {
768	if ( num_buckets ) {
769	group = (value_type )malloc_or_die(x.num_buckets sizeof(*group));
770	memcpy(group, x.group, x.num_buckets * sizeof(*group));
771	}
772	memcpy(bitmap, x.bitmap, sizeof(bitmap));
773	}
774	~sparsegroup() { free(group); } // free(NULL) does the right thing
775
776	// Operator= is just like the copy constructor, I guess
777	sparsegroup &operator=(const sparsegroup& x) {
778	if ( &x == this ) return *this; // x = x
779	if ( x.num_buckets == 0 ) {
780	free(group);
781	group = NULL;
782	} else {
783	group = (value_type *)
784	realloc_or_die(group, x.num_buckets * sizeof(*group));
785	memcpy(group, x.group, x.num_buckets * sizeof(*group));
786	}
787	memcpy(bitmap, x.bitmap, sizeof(bitmap));
788	num_buckets = x.num_buckets;
789	return *this;
790	}
791
792	// Many STL algorithms use swap instead of copy constructors
793	void swap(sparsegroup& x) {
794	STL_NAMESPACE::swap(group, x.group);
795	for ( int i = 0; i < sizeof(bitmap) / sizeof(*bitmap); ++i )
796	STL_NAMESPACE::swap(bitmap[i], x.bitmap[i]); // swap not defined on arrays
797	STL_NAMESPACE::swap(num_buckets, x.num_buckets);
798	}
799
800	// It's always nice to be able to clear a table without deallocating it
801	void clear() {
802	if ( group )
803	free(group);
804	group = NULL;
805	memset(bitmap, 0, sizeof(bitmap));
806	num_buckets = 0;
807	}
808
809	// Functions that tell you about size. Alas, these aren't so useful
810	// because our table is always fixed size.
811	size_type size() const { return GROUP_SIZE; }
812	size_type max_size() const { return GROUP_SIZE; }
813	bool empty() const { return false; }
814	// We also may want to know how many used buckets there are
815	size_type num_nonempty() const { return num_buckets; }
816
817
818	// get()/set() are explicitly const/non-const. You can use [] if
819	// you want something that can be either (potentially more expensive).
820	const_reference get(size_type i) const {
821	if ( bmtest(i) ) // bucket i is occupied
822	return group[pos_to_offset(bitmap, i)];
823	else
824	return default_value(); // return the default reference
825	}
826
827	// TODO(csilvers): make protected + friend
828	reference mutating_get(size_type i) { // fills bucket i before getting
829	if ( !bmtest(i) )
830	set(i, default_value());
831	return group[pos_to_offset(bitmap, i)];
832	}
833
834	// Syntactic sugar. It's easy to return a const reference. To
835	// return a non-const reference, we need to use the assigner adaptor.
836	const_reference operator[](size_type i) const {
837	return get(i);
838	}
839
840	element_adaptor operator[](size_type i) {
841	return element_adaptor(this, i);
842	}
843
844	// This returns a reference to the inserted item (which is a copy of val)
845	reference set(size_type i, const_reference val) {
846	size_type offset = pos_to_offset(bitmap, i); // where we'll find (or insert)
847	if ( !bmtest(i) ) { // make space at group[offset]
848	// We know the realloc and memmove are safe because we ensure
849	// value_type is a Plain Old Data Type (see below)
850	group = (value_type *)
851	realloc_or_die(group, sizeof(group) ++num_buckets);
852	memmove(group + offset+1, group + offset,
853	(num_buckets-1 - offset) * sizeof(*group));
854	bmset(i);
855	}
856	// This does the actual inserting. Since we made the array using
857	// malloc, we use "placement new" to just call the constructor.
858	// We really should destruct this later, via group[offset].~value_type(),
859	// every time we free(), but we don't because it's a POD type.
860	new(&group[offset]) value_type(val);
861	return group[offset];
862	}
863
864	// We let you see if a bucket is non-empty without retrieving it
865	bool test(size_type i) const {
866	return bmtest(i);
867	}
868	bool test(iterator pos) const {
869	return bmtest(pos.pos);
870	}
871
872	// This takes the specified elements out of the group. This is
873	// "undefining", rather than "clearing".
874	void erase(size_type i) {
875	if ( bmtest(i) ) { // trivial to erase empty bucket
876	size_type offset = pos_to_offset(bitmap,i); // where we'll find (or insert)
877	if ( --num_buckets == 0 ) {
878	free(group);
879	group = NULL;
880	} else {
881	memmove(group + offset, group + offset+1,
882	(num_buckets - offset) * sizeof(*group));
883	group = (value_type *)
884	realloc_or_die(group, sizeof(group) num_buckets);
885	}
886	bmclear(i);
887	}
888	}
889
890	void erase(iterator pos) {
891	erase(pos.pos);
892	}
893
894	void erase(iterator start, iterator end) {
895	// This could be more efficient, but to do so we'd need to make
896	// bmclear() clear a range of indices. Doesn't seem worth it.
897	for ( ; start != end; ++start )
898	erase(start);
899	}
900
901
902	// I/O
903	// We support reading and writing groups to disk. We don't store
904	// the actual array contents (which we don't know how to store),
905	// just the bitmap and size. Meant to be used with table I/O.
906	// Returns true if all was ok
907	bool write_metadata(FILE *fp) const {
908	assert(sizeof(num_buckets) == 2); // we explicitly set to u_int16_t
909	PUT_(num_buckets, 8);
910	PUT_(num_buckets, 0);
911	if ( !fwrite(bitmap, sizeof(bitmap), 1, fp) ) return false;
912	return true;
913	}
914
915	// Reading destroys the old group contents! Returns true if all was ok
916	bool read_metadata(FILE *fp) {
917	clear();
918
919	int x; // the GET_ macro requires an 'int x' to be defined
920	GET_(num_buckets, 8);
921	GET_(num_buckets, 0);
922
923	if ( !fread(bitmap, sizeof(bitmap), 1, fp) ) return false;
924
925	// We'll allocate the space; you'd better fill it somehow!
926	group = (value_type )malloc(num_buckets sizeof(*group));
927	if ( group == NULL ) return false;
928	return true;
929	}
930
931	// If your keys and values are simple enough, we can write them
932	// to disk for you. "simple enough" means no pointers.
933	// However, we don't try to normalize endianness
934	bool write_nopointer_data(FILE *fp) const {
935	for ( const_nonempty_iterator it = nonempty_begin();
936	it != nonempty_end(); ++it ) {
937	if ( !fwrite(&it, sizeof(it), 1, fp) ) return false;
938	}
939	return true;
940	}
941
942	// When reading, we have to override the potential const-ness of *it
943	bool read_nopointer_data(FILE *fp) {
944	for ( nonempty_iterator it = nonempty_begin();
945	it != nonempty_end(); ++it ) {
946	if ( !fread(reinterpret_cast<void>(&(it)), sizeof(*it), 1, fp) )
947	return false;
948	}
949	return true;
950	}
951
952	// Comparisons. Note the comparisons are pretty arbitrary: we
953	// compare values of the first index that isn't equal (using default
954	// value for empty buckets).
955	bool operator==(const sparsegroup& x) const {
956	return ( num_buckets == x.num_buckets &&
957	memcmp(bitmap, x.bitmap, sizeof(bitmap)) == 0 &&
958	STL_NAMESPACE::equal(begin(), end(), x.begin()) ); // from algorithm
959	}
960	bool operator<(const sparsegroup& x) const { // also from algorithm
961	return STL_NAMESPACE::lexicographical_compare(begin(), end(),
962	x.begin(), x.end());
963	}
964	bool operator!=(const sparsegroup& x) const { return !(*this == x); }
965	bool operator<=(const sparsegroup& x) const { return this < x \|\| this == x; }
966	bool operator>(const sparsegroup& x) const { return x <= *this; }
967	bool operator>=(const sparsegroup& x) const { return x < *this; }
968
969	private:
970	// We need to enforce that our value_type is a Plain Old Data Type
971	// (so we know realloc and memmove will work). We use traits to
972	// enforce this. The following gives a compile-time error if
973	// is_POD_type is false (which is the default for user types).
974	//
975	// IF YOU GET AN ERROR HERE, make sure your class is a POD type,
976	// and if so tell the compiler via code similar to this:
977	// template<> struct __type_traits<classname> {
978	// typedef __true_type has_trivial_default_constructor;
979	// typedef __true_type has_trivial_copy_constructor;
980	// typedef __true_type has_trivial_assignment_operator;
981	// typedef __true_type has_trivial_destructor;
982	// typedef __true_type is_POD_type;
983	// };
984	//
985	// If this is part of a hash_map, you need to make sure both the
986	// Key and Value types are POD types, if they're user-defined.
987	#ifdef UNDERSTANDS_TYPE_TRAITS
988	static __true_type * const enforce_pod;
989	#endif
990
991	// The actual data
992	value_type *group; // (small) array of T's
993	unsigned char bitmap[(GROUP_SIZE-1)/8 + 1]; // fancy math is so we round up
994	size_type num_buckets; // limits GROUP_SIZE to 64K
995	};
996
997	// We need a global swap as well
998	template <class T, uint16_t GROUP_SIZE>
999	inline void swap(sparsegroup<T,GROUP_SIZE> &x, sparsegroup<T,GROUP_SIZE> &y) {
1000	x.swap(y);
1001	}
1002
1003	#ifdef UNDERSTANDS_TYPE_TRAITS
1004	template <class T, uint16_t GROUP_SIZE>
1005	__true_type * const sparsegroup<T, GROUP_SIZE>::enforce_pod =
1006	static_cast<typename __type_traits<value_type>::is_POD_type *>(0);
1007	#endif
1008
1009	// ---------------------------------------------------------------------------
1010
1011
1012	template <class T, uint16_t GROUP_SIZE = DEFAULT_SPARSEGROUP_SIZE>
1013	class sparsetable {
1014	public:
1015	// Basic types
1016	typedef T value_type; // stolen from stl_vector.h
1017	typedef value_type* pointer;
1018	typedef const value_type* const_pointer;
1019	typedef table_iterator<sparsetable<T, GROUP_SIZE> > iterator;
1020	typedef const_table_iterator<sparsetable<T, GROUP_SIZE> > const_iterator;
1021	typedef table_element_adaptor<sparsetable<T, GROUP_SIZE> > element_adaptor;
1022	typedef value_type &reference;
1023	typedef const value_type &const_reference;
1024	typedef size_t size_type;
1025	typedef ptrdiff_t difference_type;
1026	typedef STL_NAMESPACE::reverse_iterator<const_iterator> const_reverse_iterator;
1027	typedef STL_NAMESPACE::reverse_iterator<iterator> reverse_iterator;
1028
1029	// These are our special iterators, that go over non-empty buckets in a
1030	// table. These aren't const only because you can change non-empty bcks.
1031	typedef two_d_iterator< vector< sparsegroup<value_type, GROUP_SIZE> > >
1032	nonempty_iterator;
1033	typedef const_two_d_iterator< vector< sparsegroup<value_type, GROUP_SIZE> > >
1034	const_nonempty_iterator;
1035	typedef STL_NAMESPACE::reverse_iterator<nonempty_iterator> reverse_nonempty_iterator;
1036	typedef STL_NAMESPACE::reverse_iterator<const_nonempty_iterator> const_reverse_nonempty_iterator;
1037	// Another special iterator: it frees memory as it iterates (used to resize)
1038	typedef destructive_two_d_iterator< vector< sparsegroup<value_type, GROUP_SIZE> > >
1039	destructive_iterator;
1040
1041	// Iterator functions
1042	iterator begin() { return iterator(this, 0); }
1043	const_iterator begin() const { return const_iterator(this, 0); }
1044	iterator end() { return iterator(this, size()); }
1045	const_iterator end() const { return const_iterator(this, size()); }
1046	reverse_iterator rbegin() { return reverse_iterator(end()); }
1047	const_reverse_iterator rbegin() const { return const_reverse_iterator(end()); }
1048	reverse_iterator rend() { return reverse_iterator(begin()); }
1049	const_reverse_iterator rend() const { return const_reverse_iterator(begin()); }
1050
1051	// Versions for our special non-empty iterator
1052	nonempty_iterator nonempty_begin() {
1053	return nonempty_iterator(groups.begin(), groups.end(), groups.begin());
1054	}
1055	const_nonempty_iterator nonempty_begin() const {
1056	return const_nonempty_iterator(groups.begin(),groups.end(), groups.begin());
1057	}
1058	nonempty_iterator nonempty_end() {
1059	return nonempty_iterator(groups.begin(), groups.end(), groups.end());
1060	}
1061	const_nonempty_iterator nonempty_end() const {
1062	return const_nonempty_iterator(groups.begin(), groups.end(), groups.end());
1063	}
1064	reverse_nonempty_iterator nonempty_rbegin() {
1065	return reverse_nonempty_iterator(nonempty_end());
1066	}
1067	const_reverse_nonempty_iterator nonempty_rbegin() const {
1068	return const_reverse_nonempty_iterator(nonempty_end());
1069	}
1070	reverse_nonempty_iterator nonempty_rend() {
1071	return reverse_nonempty_iterator(nonempty_begin());
1072	}
1073	const_reverse_nonempty_iterator nonempty_rend() const {
1074	return const_reverse_nonempty_iterator(nonempty_begin());
1075	}
1076	destructive_iterator destructive_begin() {
1077	return destructive_iterator(groups.begin(), groups.end(), groups.begin());
1078	}
1079	destructive_iterator destructive_end() {
1080	return destructive_iterator(groups.begin(), groups.end(), groups.end());
1081	}
1082
1083	private:
1084	typedef typename vector< sparsegroup<value_type, GROUP_SIZE> >::reference
1085	GroupsReference;
1086	typedef typename
1087	vector< sparsegroup<value_type, GROUP_SIZE> >::const_reference
1088	GroupsConstReference;
1089	typedef typename vector< sparsegroup<value_type, GROUP_SIZE> >::iterator
1090	GroupsIterator;
1091	typedef typename vector< sparsegroup<value_type, GROUP_SIZE> >::const_iterator
1092	GroupsConstIterator;
1093
1094	// How to deal with the proper group
1095	static size_type num_groups(size_type num) { // how many to hold num buckets
1096	return num == 0 ? 0 : ((num-1) / GROUP_SIZE) + 1;
1097	}
1098
1099	size_type pos_in_group(size_type i) const { return i % GROUP_SIZE; }
1100	size_type group_num(size_type i) const { return i / GROUP_SIZE; }
1101	GroupsReference which_group(size_type i) { return groups[group_num(i)]; }
1102	GroupsConstReference which_group(size_type i) const
1103	{ return groups[group_num(i)]; }
1104
1105
1106	public:
1107	// Constructors -- default, normal (when you specify size), and copy
1108	sparsetable(size_type size = 0)
1109	: groups(num_groups(size)), table_size(size), num_buckets(0) { }
1110	// We'll can get away with using the default copy constructor,
1111	// and default destructor, and hence the default operator=. Huzzah!
1112
1113	// Many STL algorithms use swap instead of copy constructors
1114	void swap(sparsetable& x) {
1115	STL_NAMESPACE::swap(groups, x.groups);
1116	STL_NAMESPACE::swap(table_size, x.table_size);
1117	STL_NAMESPACE::swap(num_buckets, x.num_buckets);
1118	}
1119
1120	// It's always nice to be able to clear a table without deallocating it
1121	void clear() {
1122	GroupsIterator group;
1123	for ( group = groups.begin(); group != groups.end(); ++group ) {
1124	group->clear();
1125	}
1126	num_buckets = 0;
1127	}
1128
1129	// Functions that tell you about size.
1130	// NOTE: empty() is non-intuitive! It does not tell you the number
1131	// of not-empty buckets (use num_nonempty() for that). Instead
1132	// it says whether you've allocated any buckets or not.
1133	size_type size() const { return table_size; }
1134	size_type max_size() const { return size_type(-1); }
1135	bool empty() const { return table_size == 0; }
1136	// We also may want to know how many used buckets there are
1137	size_type num_nonempty() const { return num_buckets; }
1138
1139	// OK, we'll let you resize one of these puppies
1140	void resize(size_type new_size) {
1141	groups.resize(num_groups(new_size));
1142	if ( new_size < table_size) { // lower num_buckets, clear last group
1143	if ( new_size % GROUP_SIZE > 0 ) // need to clear from last group
1144	groups.back().erase(groups.back().begin() + (new_size % GROUP_SIZE),
1145	groups.back().end());
1146	num_buckets = 0; // refigure # of used buckets
1147	GroupsConstIterator group;
1148	for ( group = groups.begin(); group != groups.end(); ++group )
1149	num_buckets += group->num_nonempty();
1150	}
1151	table_size = new_size;
1152	}
1153
1154
1155	// We let you see if a bucket is non-empty without retrieving it
1156	bool test(size_type i) const {
1157	return which_group(i).test(pos_in_group(i));
1158	}
1159	bool test(iterator pos) const {
1160	return which_group(pos.pos).test(pos_in_group(pos.pos));
1161	}
1162	bool test(const_iterator pos) const {
1163	return which_group(pos.pos).test(pos_in_group(pos.pos));
1164	}
1165
1166	// We only return const_references because it's really hard to
1167	// return something settable for empty buckets. Use set() instead.
1168	const_reference get(size_type i) const {
1169	assert(i < table_size);
1170	return which_group(i).get(pos_in_group(i));
1171	}
1172
1173	// TODO(csilvers): make protected + friend element_adaptor
1174	reference mutating_get(size_type i) { // fills bucket i before getting
1175	assert(i < table_size);
1176	size_type old_numbuckets = which_group(i).num_nonempty();
1177	reference retval = which_group(i).mutating_get(pos_in_group(i));
1178	num_buckets += which_group(i).num_nonempty() - old_numbuckets;
1179	return retval;
1180	}
1181
1182	// Syntactic sugar. As in sparsegroup, the non-const version is harder
1183	const_reference operator[](size_type i) const {
1184	return get(i);
1185	}
1186
1187	element_adaptor operator[](size_type i) {
1188	return element_adaptor(this, i);
1189	}
1190
1191	// Needed for hashtables, gets as a nonempty_iterator. Crashes for empty bcks
1192	const_nonempty_iterator get_iter(size_type i) const {
1193	assert(test(i)); // how can a nonempty_iterator point to an empty bucket?
1194	return const_nonempty_iterator(
1195	groups.begin(), groups.end(),
1196	groups.begin() + group_num(i),
1197	(groups[group_num(i)].nonempty_begin() +
1198	groups[group_num(i)].pos_to_offset(pos_in_group(i))));
1199	}
1200	// For nonempty we can return a non-const version
1201	nonempty_iterator get_iter(size_type i) {
1202	assert(test(i)); // how can a nonempty_iterator point to an empty bucket?
1203	return nonempty_iterator(
1204	groups.begin(), groups.end(),
1205	groups.begin() + group_num(i),
1206	(groups[group_num(i)].nonempty_begin() +
1207	groups[group_num(i)].pos_to_offset(pos_in_group(i))));
1208	}
1209
1210
1211	// This returns a reference to the inserted item (which is a copy of val)
1212	// The trick is to figure out whether we're replacing or inserting anew
1213	reference set(size_type i, const_reference val) {
1214	assert(i < table_size);
1215	size_type old_numbuckets = which_group(i).num_nonempty();
1216	reference retval = which_group(i).set(pos_in_group(i), val);
1217	num_buckets += which_group(i).num_nonempty() - old_numbuckets;
1218	return retval;
1219	}
1220
1221	// This takes the specified elements out of the table. This is
1222	// "undefining", rather than "clearing".
1223	void erase(size_type i) {
1224	assert(i < table_size);
1225	size_type old_numbuckets = which_group(i).num_nonempty();
1226	which_group(i).erase(pos_in_group(i));
1227	num_buckets += which_group(i).num_nonempty() - old_numbuckets;
1228	}
1229
1230	void erase(iterator pos) {
1231	erase(pos.pos);
1232	}
1233
1234	void erase(iterator start, iterator end) {
1235	// This could be more efficient, but then we'd need to figure
1236	// out if we spanned groups or not. Doesn't seem worth it.
1237	for ( ; start != end; ++start )
1238	erase(start);
1239	}
1240
1241
1242	// We support reading and writing tables to disk. We don't store
1243	// the actual array contents (which we don't know how to store),
1244	// just the groups and sizes. Returns true if all went ok.
1245
1246	private:
1247	// Every time the disk format changes, this should probably change too
1248	static const unsigned long MAGIC_NUMBER = 0x24687531;
1249
1250	// Old versions of this code write all data in 32 bits. We need to
1251	// support these files as well as having support for 64-bit systems.
1252	// So we use the following encoding scheme: for values < 2^32-1, we
1253	// store in 4 bytes in big-endian order. For values > 2^32, we
1254	// store 0xFFFFFFF followed by 8 bytes in big-endian order. This
1255	// causes us to mis-read old-version code that stores exactly
1256	// 0xFFFFFFF, but I don't think that is likely to have happened for
1257	// these particular values.
1258	static bool write_32_or_64(FILE* fp, size_type value) {
1259	if ( value < 0xFFFFFFFFULL ) { // fits in 4 bytes
1260	PUT_(value, 24);
1261	PUT_(value, 16);
1262	PUT_(value, 8);
1263	PUT_(value, 0);
1264	} else if ( value == 0xFFFFFFFFUL ) { // special case in 32bit systems
1265	PUT_(0xFF, 0); PUT_(0xFF, 0); PUT_(0xFF, 0); PUT_(0xFF, 0); // marker
1266	PUT_(0, 0); PUT_(0, 0); PUT_(0, 0); PUT_(0, 0);
1267	PUT_(0xFF, 0); PUT_(0xFF, 0); PUT_(0xFF, 0); PUT_(0xFF, 0);
1268	} else {
1269	PUT_(0xFF, 0); PUT_(0xFF, 0); PUT_(0xFF, 0); PUT_(0xFF, 0); // marker
1270	PUT_(value, 56);
1271	PUT_(value, 48);
1272	PUT_(value, 40);
1273	PUT_(value, 32);
1274	PUT_(value, 24);
1275	PUT_(value, 16);
1276	PUT_(value, 8);
1277	PUT_(value, 0);
1278	}
1279	return true;
1280	}
1281
1282	static bool read_32_or_64(FILE* fp, size_type *value) { // reads into value
1283	size_type first4 = 0;
1284	int x;
1285	GET_(first4, 24);
1286	GET_(first4, 16);
1287	GET_(first4, 8);
1288	GET_(first4, 0);
1289	if ( first4 < 0xFFFFFFFFULL ) {
1290	*value = first4;
1291	} else {
1292	GET_(*value, 56);
1293	GET_(*value, 48);
1294	GET_(*value, 40);
1295	GET_(*value, 32);
1296	GET_(*value, 24);
1297	GET_(*value, 16);
1298	GET_(*value, 8);
1299	GET_(*value, 0);
1300	}
1301	return true;
1302	}
1303
1304	public:
1305	bool write_metadata(FILE *fp) const {
1306	if ( !write_32_or_64(fp, MAGIC_NUMBER) ) return false;
1307	if ( !write_32_or_64(fp, table_size) ) return false;
1308	if ( !write_32_or_64(fp, num_buckets) ) return false;
1309
1310	GroupsConstIterator group;
1311	for ( group = groups.begin(); group != groups.end(); ++group )
1312	if ( group->write_metadata(fp) == false ) return false;
1313	return true;
1314	}
1315
1316	// Reading destroys the old table contents! Returns true if read ok.
1317	bool read_metadata(FILE *fp) {
1318	size_type magic_read = 0;
1319	if ( !read_32_or_64(fp, &magic_read) ) return false;
1320	if ( magic_read != MAGIC_NUMBER ) {
1321	clear(); // just to be consistent
1322	return false;
1323	}
1324
1325	if ( !read_32_or_64(fp, &table_size) ) return false;
1326	if ( !read_32_or_64(fp, &num_buckets) ) return false;
1327
1328	resize(table_size); // so the vector's sized ok
1329	GroupsIterator group;
1330	for ( group = groups.begin(); group != groups.end(); ++group )
1331	if ( group->read_metadata(fp) == false ) return false;
1332	return true;
1333	}
1334
1335	// This code is identical to that for SparseGroup
1336	// If your keys and values are simple enough, we can write them
1337	// to disk for you. "simple enough" means no pointers.
1338	// However, we don't try to normalize endianness
1339	bool write_nopointer_data(FILE *fp) const {
1340	for ( const_nonempty_iterator it = nonempty_begin();
1341	it != nonempty_end(); ++it ) {
1342	if ( !fwrite(&it, sizeof(it), 1, fp) ) return false;
1343	}
1344	return true;
1345	}
1346
1347	// When reading, we have to override the potential const-ness of *it
1348	bool read_nopointer_data(FILE *fp) {
1349	for ( nonempty_iterator it = nonempty_begin();
1350	it != nonempty_end(); ++it ) {
1351	if ( !fread(reinterpret_cast<void>(&(it)), sizeof(*it), 1, fp) )
1352	return false;
1353	}
1354	return true;
1355	}
1356
1357	// Comparisons. Note the comparisons are pretty arbitrary: we
1358	// compare values of the first index that isn't equal (using default
1359	// value for empty buckets).
1360	bool operator==(const sparsetable& x) const {
1361	return ( table_size == x.table_size &&
1362	num_buckets == x.num_buckets &&
1363	groups == x.groups );
1364	}
1365	bool operator<(const sparsetable& x) const { // also from algobase.h
1366	return STL_NAMESPACE::lexicographical_compare(begin(), end(),
1367	x.begin(), x.end());
1368	}
1369	bool operator!=(const sparsetable& x) const { return !(*this == x); }
1370	bool operator<=(const sparsetable& x) const { return this < x \|\| this == x; }
1371	bool operator>(const sparsetable& x) const { return x <= *this; }
1372	bool operator>=(const sparsetable& x) const { return x < *this; }
1373
1374
1375	private:
1376	// The actual data
1377	vector< sparsegroup<value_type, GROUP_SIZE> > groups; // our list of groups
1378	size_type table_size; // how many buckets they want
1379	size_type num_buckets; // number of non-empty buckets
1380	};
1381
1382	// We need a global swap as well
1383	template <class T, uint16_t GROUP_SIZE>
1384	inline void swap(sparsetable<T,GROUP_SIZE> &x, sparsetable<T,GROUP_SIZE> &y) {
1385	x.swap(y);
1386	}
1387
1388	#undef GET_
1389	#undef PUT_
1390
1391	_END_GOOGLE_NAMESPACE_
1392
1393	#endif

Note: See TracBrowser for help on using the repository browser.

Download in other formats:

Original Format