1 | /*=============================================================================
|
---|
2 | Copyright (c) 2001-2003 Joel de Guzman
|
---|
3 | http://spirit.sourceforge.net/
|
---|
4 |
|
---|
5 | Use, modification and distribution is subject to the Boost Software
|
---|
6 | License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at
|
---|
7 | http://www.boost.org/LICENSE_1_0.txt)
|
---|
8 | =============================================================================*/
|
---|
9 | #ifndef BOOST_SPIRIT_TST_IPP
|
---|
10 | #define BOOST_SPIRIT_TST_IPP
|
---|
11 |
|
---|
12 | ///////////////////////////////////////////////////////////////////////////////
|
---|
13 | #include <memory> // for std::auto_ptr
|
---|
14 | #include <boost/spirit/core/assert.hpp>
|
---|
15 |
|
---|
16 | ///////////////////////////////////////////////////////////////////////////////
|
---|
17 | namespace boost { namespace spirit {
|
---|
18 |
|
---|
19 | namespace impl
|
---|
20 | {
|
---|
21 |
|
---|
22 | ///////////////////////////////////////////////////////////////////////////////
|
---|
23 | //
|
---|
24 | // tst class
|
---|
25 | //
|
---|
26 | // Ternary Search Tree implementation. The data structure is faster than
|
---|
27 | // hashing for many typical search problems especially when the search
|
---|
28 | // interface is iterator based. Searching for a string of length k in a
|
---|
29 | // ternary search tree with n strings will require at most O(log n+k)
|
---|
30 | // character comparisons. TSTs are many times faster than hash tables
|
---|
31 | // for unsuccessful searches since mismatches are discovered earlier
|
---|
32 | // after examining only a few characters. Hash tables always examine an
|
---|
33 | // entire key when searching.
|
---|
34 | //
|
---|
35 | // For details see http://www.cs.princeton.edu/~rs/strings/.
|
---|
36 | //
|
---|
37 | // *** This is a low level class and is
|
---|
38 | // not meant for public consumption ***
|
---|
39 | //
|
---|
40 | ///////////////////////////////////////////////////////////////////////////////
|
---|
41 | template <typename T, typename CharT>
|
---|
42 | struct tst_node
|
---|
43 | {
|
---|
44 | tst_node(CharT value_)
|
---|
45 | : value(value_)
|
---|
46 | , left(0)
|
---|
47 | , right(0)
|
---|
48 | { middle.link = 0; }
|
---|
49 |
|
---|
50 | ~tst_node()
|
---|
51 | {
|
---|
52 | delete left;
|
---|
53 | delete right;
|
---|
54 | if (value)
|
---|
55 | delete middle.link;
|
---|
56 | else
|
---|
57 | delete middle.data;
|
---|
58 | }
|
---|
59 |
|
---|
60 | tst_node*
|
---|
61 | clone() const
|
---|
62 | {
|
---|
63 | std::auto_ptr<tst_node> copy(new tst_node(value));
|
---|
64 |
|
---|
65 | if (left)
|
---|
66 | copy->left = left->clone();
|
---|
67 | if (right)
|
---|
68 | copy->right = right->clone();
|
---|
69 |
|
---|
70 | if (value && middle.link)
|
---|
71 | {
|
---|
72 | copy->middle.link = middle.link->clone();
|
---|
73 | }
|
---|
74 | else
|
---|
75 | {
|
---|
76 | std::auto_ptr<T> mid_data(new T(*middle.data));
|
---|
77 | copy->middle.data = mid_data.release();
|
---|
78 | }
|
---|
79 |
|
---|
80 | return copy.release();
|
---|
81 | }
|
---|
82 |
|
---|
83 | union center {
|
---|
84 |
|
---|
85 | tst_node* link;
|
---|
86 | T* data;
|
---|
87 | };
|
---|
88 |
|
---|
89 | CharT value;
|
---|
90 | tst_node* left;
|
---|
91 | center middle;
|
---|
92 | tst_node* right;
|
---|
93 | };
|
---|
94 |
|
---|
95 | ///////////////////////////////////////////////////////////////////////////
|
---|
96 | template <typename T, typename CharT>
|
---|
97 | class tst
|
---|
98 | {
|
---|
99 | public:
|
---|
100 |
|
---|
101 | struct search_info
|
---|
102 | {
|
---|
103 | T* data;
|
---|
104 | std::size_t length;
|
---|
105 | };
|
---|
106 |
|
---|
107 | tst()
|
---|
108 | : root(0) {}
|
---|
109 |
|
---|
110 | tst(tst const& other)
|
---|
111 | : root(other.root ? other.root->clone() : 0) {}
|
---|
112 |
|
---|
113 | ~tst()
|
---|
114 | { delete root; }
|
---|
115 |
|
---|
116 | tst&
|
---|
117 | operator=(tst const& other)
|
---|
118 | {
|
---|
119 | if (this != &other)
|
---|
120 | {
|
---|
121 | node_t* new_root = other.root ? other.root->clone() : 0;
|
---|
122 | delete root;
|
---|
123 | root = new_root;
|
---|
124 | }
|
---|
125 | return *this;
|
---|
126 | }
|
---|
127 |
|
---|
128 | template <typename IteratorT>
|
---|
129 | T* add(IteratorT first, IteratorT const& last, T const& data)
|
---|
130 | {
|
---|
131 | if (first == last)
|
---|
132 | return 0;
|
---|
133 |
|
---|
134 | node_t** np = &root;
|
---|
135 | CharT ch = *first;
|
---|
136 |
|
---|
137 | BOOST_SPIRIT_ASSERT(first == last || ch != 0
|
---|
138 | && "Won't add string containing null character");
|
---|
139 |
|
---|
140 | for (;;)
|
---|
141 | {
|
---|
142 | if (*np == 0 || ch == 0)
|
---|
143 | {
|
---|
144 | node_t* right = 0;
|
---|
145 | if (np != 0)
|
---|
146 | right = *np;
|
---|
147 | *np = new node_t(ch);
|
---|
148 | if (right)
|
---|
149 | (**np).right = right;
|
---|
150 | }
|
---|
151 |
|
---|
152 | if (ch < (**np).value)
|
---|
153 | {
|
---|
154 | np = &(**np).left;
|
---|
155 | }
|
---|
156 | else
|
---|
157 | {
|
---|
158 | if (ch == (**np).value)
|
---|
159 | {
|
---|
160 | if (ch == 0)
|
---|
161 | {
|
---|
162 | if ((**np).middle.data == 0)
|
---|
163 | {
|
---|
164 | (**np).middle.data = new T(data);
|
---|
165 | return (**np).middle.data;
|
---|
166 | }
|
---|
167 | else
|
---|
168 | {
|
---|
169 | // re-addition is disallowed
|
---|
170 | return 0;
|
---|
171 | }
|
---|
172 | }
|
---|
173 | ++first;
|
---|
174 | ch = (first == last) ? CharT(0) : *first;
|
---|
175 | BOOST_SPIRIT_ASSERT(first == last || ch != 0
|
---|
176 | && "Won't add string containing null character");
|
---|
177 | np = &(**np).middle.link;
|
---|
178 | }
|
---|
179 | else
|
---|
180 | {
|
---|
181 | np = &(**np).right;
|
---|
182 | }
|
---|
183 | }
|
---|
184 | }
|
---|
185 | }
|
---|
186 |
|
---|
187 | template <typename ScannerT>
|
---|
188 | search_info find(ScannerT const& scan) const
|
---|
189 | {
|
---|
190 | search_info result = { 0, 0 };
|
---|
191 | if (scan.at_end()) {
|
---|
192 | return result;
|
---|
193 | }
|
---|
194 |
|
---|
195 | typedef typename ScannerT::iterator_t iterator_t;
|
---|
196 | node_t* np = root;
|
---|
197 | CharT ch = *scan;
|
---|
198 | iterator_t save = scan.first;
|
---|
199 | iterator_t latest = scan.first;
|
---|
200 | std::size_t latest_len = 0;
|
---|
201 |
|
---|
202 | while (np)
|
---|
203 | {
|
---|
204 |
|
---|
205 | if (ch < np->value) // => go left!
|
---|
206 | {
|
---|
207 | if (np->value == 0)
|
---|
208 | {
|
---|
209 | result.data = np->middle.data;
|
---|
210 | if (result.data)
|
---|
211 | {
|
---|
212 | latest = scan.first;
|
---|
213 | latest_len = result.length;
|
---|
214 | }
|
---|
215 | }
|
---|
216 |
|
---|
217 | np = np->left;
|
---|
218 | }
|
---|
219 | else if (ch == np->value) // => go middle!
|
---|
220 | {
|
---|
221 | // Matching the null character is not allowed.
|
---|
222 | if (np->value == 0)
|
---|
223 | {
|
---|
224 | result.data = np->middle.data;
|
---|
225 | if (result.data)
|
---|
226 | {
|
---|
227 | latest = scan.first;
|
---|
228 | latest_len = result.length;
|
---|
229 | }
|
---|
230 | break;
|
---|
231 | }
|
---|
232 |
|
---|
233 | ++scan;
|
---|
234 | ch = scan.at_end() ? CharT(0) : *scan;
|
---|
235 | np = np->middle.link;
|
---|
236 | ++result.length;
|
---|
237 | }
|
---|
238 | else // (ch > np->value) => go right!
|
---|
239 | {
|
---|
240 | if (np->value == 0)
|
---|
241 | {
|
---|
242 | result.data = np->middle.data;
|
---|
243 | if (result.data)
|
---|
244 | {
|
---|
245 | latest = scan.first;
|
---|
246 | latest_len = result.length;
|
---|
247 | }
|
---|
248 | }
|
---|
249 |
|
---|
250 | np = np->right;
|
---|
251 | }
|
---|
252 | }
|
---|
253 |
|
---|
254 | if (result.data == 0)
|
---|
255 | {
|
---|
256 | scan.first = save;
|
---|
257 | }
|
---|
258 | else
|
---|
259 | {
|
---|
260 | scan.first = latest;
|
---|
261 | result.length = latest_len;
|
---|
262 | }
|
---|
263 | return result;
|
---|
264 | }
|
---|
265 |
|
---|
266 | private:
|
---|
267 |
|
---|
268 | typedef tst_node<T, CharT> node_t;
|
---|
269 | node_t* root;
|
---|
270 | };
|
---|
271 |
|
---|
272 | ///////////////////////////////////////////////////////////////////////////////
|
---|
273 | } // namespace impl
|
---|
274 |
|
---|
275 | }} // namespace boost::spirit
|
---|
276 |
|
---|
277 | #endif
|
---|