source: NonGTP/Boost/boost/date_time/string_parse_tree.hpp @ 857

Revision 857, 8.4 KB checked in by igarcia, 18 years ago (diff)
Line 
1#ifndef BOOST_DATE_TIME_STRING_PARSE_TREE___HPP__
2#define BOOST_DATE_TIME_STRING_PARSE_TREE___HPP__
3
4/* Copyright (c) 2004-2005 CrystalClear Software, Inc.
5 * Use, modification and distribution is subject to the
6 * Boost Software License, Version 1.0. (See accompanying
7 * file LICENSE-1.0 or http://www.boost.org/LICENSE-1.0)
8 * Author: Jeff Garland, Bart Garst
9 * $Date: 2005/07/01 02:58:57 $
10 */
11
12
13#include "boost/lexical_cast.hpp" //error without?
14#include "boost/algorithm/string/case_conv.hpp"
15#include <map>
16#include <string>
17#include <vector>
18#include <algorithm>
19
20namespace boost { namespace date_time {
21
22
23template<typename charT>
24struct parse_match_result
25{
26  parse_match_result() :
27    match_depth(0),
28    current_match(-1)// -1 is match_not-found value
29  {}
30  typedef std::basic_string<charT> string_type;
31  string_type remaining() const
32  {
33    if (match_depth == cache.size()) {
34      return string_type();
35    }
36    if (current_match == -1) {
37      return cache;
38    }
39    //some of the cache was used return the rest
40    return string_type(cache, match_depth);
41  }
42  charT last_char() const
43  {
44    return cache[cache.size()-1];
45  }
46  //! Returns true if more characters were parsed than was necessary
47  /*! Should be used in conjunction with last_char()
48   * to get the remaining character. */
49  bool has_remaining() const
50  {
51    return (cache.size() > match_depth);
52  }
53 
54  // cache will hold characters that have been read from the stream
55  string_type cache;
56  unsigned short match_depth;
57  short current_match;
58  static const short PARSE_ERROR;
59};
60template<typename charT>
61const short parse_match_result<charT>::PARSE_ERROR = -1;
62
63  //for debug -- really only char streams...
64template<typename charT>
65std::basic_ostream<charT>&
66operator<<(std::basic_ostream<charT>& os, parse_match_result<charT>& mr)
67{
68  os << "cm: " << mr.current_match
69     << " C: '" << mr.cache
70     << "' md: " << mr.match_depth
71     << " R: " << mr.remaining();
72  return os;
73}
74
75
76
77//! Recursive data structure to allow efficient parsing of various strings
78/*! This class provides a quick lookup by building what amounts to a
79 *  tree data structure.  It also features a match function which can
80 *  can handle nasty input interators by caching values as it recurses
81 *  the tree so that it can backtrack as needed.
82 */
83template<typename charT>
84struct string_parse_tree
85{
86  typedef std::multimap<charT, string_parse_tree> ptree_coll;
87  typedef typename ptree_coll::value_type value_type;
88  typedef typename ptree_coll::iterator iterator;
89  typedef typename ptree_coll::const_iterator const_iterator;
90  typedef std::basic_string<charT> string_type;
91  typedef std::vector<std::basic_string<charT> > collection_type;
92  typedef parse_match_result<charT> parse_match_result_type;
93 
94  /*! Parameter "starting_point" desingates where the numbering begins.
95   * A starting_point of zero will start the numbering at zero
96   * (Sun=0, Mon=1, ...) were a starting_point of one starts the
97   * numbering at one (Jan=1, Feb=2, ...). The default is zero,
98   * negative vaules are not allowed */
99  string_parse_tree(collection_type names, unsigned int starting_point=0)
100  {
101    // iterate thru all the elements and build the tree
102    unsigned short index = 0;
103    while (index != names.size() ) {
104      string_type s = boost::algorithm::to_lower_copy(names[index]);
105      insert(s, static_cast<unsigned short>(index + starting_point));
106      index++;
107    }
108    //set the last tree node = index+1  indicating a value
109    index++;
110  }
111
112
113  string_parse_tree(short value = -1) :
114    m_value(value)
115  {}
116  ptree_coll m_next_chars;
117  short m_value;
118
119  void insert(const string_type& s, unsigned short value)
120  {
121    unsigned int i = 0;
122    iterator ti;
123    while(i < s.size()) {
124      if (i==0) {
125        if (i == (s.size()-1)) {
126          ti = m_next_chars.insert(value_type(s[i],
127                                              string_parse_tree<charT>(value)));
128        }
129        else {
130          ti = m_next_chars.insert(value_type(s[i],
131                                              string_parse_tree<charT>()));
132        }
133      }
134      else {
135        if (i == (s.size()-1)) {
136          ti = ti->second.m_next_chars.insert(value_type(s[i],
137                                                         string_parse_tree<charT>(value)));
138        }
139       
140        else {
141          ti = ti->second.m_next_chars.insert(value_type(s[i],
142                                                         string_parse_tree<charT>()));
143        }
144     
145      }
146      i++;
147    }
148  }
149 
150 
151  //! Recursive function that finds a matching string in the tree.
152  /*! Must check match_results::has_remaining() after match() is
153   * called. This is required so the user can determine if
154   * stream iterator is already pointing to the expected
155   * character or not (match() might advance sitr to next char in stream).
156   *
157   * A parse_match_result that has been returned from a failed match
158   * attempt can be sent in to the match function of a different
159   * string_parse_tree to attempt a match there. Use the iterators
160   * for the partially consumed stream, the parse_match_result object,
161   * and '0' for the level parameter. */
162  short
163  match(std::istreambuf_iterator<charT>& sitr,
164        std::istreambuf_iterator<charT>& stream_end,
165        parse_match_result_type& result,
166        unsigned int& level)  const
167  {
168
169    level++;
170    charT c;
171    // if we conditionally advance sitr, we won't have
172    // to consume the next character past the input
173    bool adv_itr = true;
174    if (level > result.cache.size()) {
175      if (sitr == stream_end) return 0; //bail - input exhausted
176      c = static_cast<charT>(std::tolower(*sitr));
177      //result.cache += c;
178      //sitr++;
179    }
180    else {
181      // if we're looking for characters from the cache,
182      // we don't want to increment sitr
183      adv_itr = false;
184      c = static_cast<charT>(std::tolower(result.cache[level-1]));
185    }
186    const_iterator litr = m_next_chars.lower_bound(c);
187    const_iterator uitr = m_next_chars.upper_bound(c);
188    while (litr != uitr) { // equal if not found
189      if(adv_itr) {
190        sitr++;
191        result.cache += c;
192      }
193      if (litr->second.m_value != -1) { // -1 is default value
194        if (result.match_depth < level) {
195          result.current_match = litr->second.m_value;
196          result.match_depth = static_cast<unsigned short>(level);
197        }
198        litr->second.match(sitr, stream_end,
199                           result, level);
200        level--;
201      }
202      else {
203        litr->second.match(sitr, stream_end,
204                           result, level);
205        level--;
206      }
207   
208      if(level <= result.cache.size()) {
209        adv_itr = false;
210      }
211
212      litr++;
213    }
214    return result.current_match;
215
216  }
217
218  /*! Must check match_results::has_remaining() after match() is
219   * called. This is required so the user can determine if
220   * stream iterator is already pointing to the expected
221   * character or not (match() might advance sitr to next char in stream).
222   */
223  parse_match_result_type
224  match(std::istreambuf_iterator<charT>& sitr,
225        std::istreambuf_iterator<charT>& stream_end) const
226  {
227    // lookup to_lower of char in tree.
228    unsigned int level = 0;
229    //    string_type cache;
230    parse_match_result_type result;
231    match(sitr, stream_end, result, level);
232    return result;
233  }
234
235  void printme(std::ostream& os, int& level)
236  {
237    level++;
238    iterator itr = m_next_chars.begin();
239    iterator end = m_next_chars.end();
240    //    os << "starting level: " << level << std::endl;
241    while (itr != end) {
242      os << "level:  " << level
243         << " node:  " << itr->first
244         << " value: " << itr->second.m_value
245         << std::endl;
246      itr->second.printme(os, level);
247      itr++;
248    }
249    level--;
250  }
251
252  void print(std::ostream& os)
253  {
254    int level = 0;
255    printme(os, level);
256  }
257   
258  void printmatch(std::ostream& os, charT c)
259  {
260    iterator litr = m_next_chars.lower_bound(c);
261    iterator uitr = m_next_chars.upper_bound(c);
262    os << "matches for: " << c << std::endl;
263    while (litr != uitr) {
264      os << " node:  " << litr->first
265         << " value: " << litr->second.m_value
266         << std::endl;
267      litr++;
268    }
269  }
270
271};
272
273
274} } //namespace
275#endif
Note: See TracBrowser for help on using the repository browser.