source: NonGTP/Boost/boost/date_time/format_date_parser.hpp @ 857

Revision 857, 22.7 KB checked in by igarcia, 18 years ago (diff)
Line 
1
2#ifndef DATE_TIME_FORMAT_DATE_PARSER_HPP__
3#define DATE_TIME_FORMAT_DATE_PARSER_HPP__
4
5/* Copyright (c) 2004-2005 CrystalClear Software, Inc.
6 * Use, modification and distribution is subject to the
7 * Boost Software License, Version 1.0. (See accompanying
8 * file LICENSE-1.0 or http://www.boost.org/LICENSE-1.0)
9 * Author: Jeff Garland, Bart Garst
10 * $Date: 2005/06/16 13:22:27 $
11 */
12
13
14#include "boost/lexical_cast.hpp"
15#include "boost/date_time/string_parse_tree.hpp"
16#include "boost/date_time/strings_from_facet.hpp"
17#include "boost/date_time/special_values_parser.hpp"
18#include <string>
19#include <vector>
20
21namespace boost { namespace date_time {
22 
23
24//! Helper function for parsing fixed length strings into integers
25/*! Will consume 'length' number of characters from stream. Consumed
26 * character are transfered to parse_match_result struct.
27 * Returns '-1' if no number can be parsed or incorrect number of
28 * digits in stream. */
29template<typename int_type, typename charT>
30inline
31int_type
32fixed_string_to_int(std::istreambuf_iterator<charT>& itr,
33                    std::istreambuf_iterator<charT>& stream_end,
34                    parse_match_result<charT>& mr,
35                    unsigned int length)
36{
37  //typedef std::basic_string<charT>  string_type;
38  unsigned int j = 0;
39  //string_type s;
40  while (j < length && itr != stream_end && std::isdigit(*itr)) {
41    mr.cache += (*itr);
42    itr++;
43    j++;
44  }
45  int_type i = -1;
46  // mr.cache will hold leading zeros. size() tells us when input is too short.
47  if(mr.cache.size() < length) {
48    return i;
49  }
50  try {
51    i = boost::lexical_cast<int_type>(mr.cache);
52  }catch(bad_lexical_cast blc){
53    // we want to return -1 if the cast fails so nothing to do here
54  }
55  return i;
56}
57
58//! Helper function for parsing varied length strings into integers
59/*! Will consume 'max_length' characters from stream only if those
60 * characters are digits. Returns '-1' if no number can be parsed.
61 * Will not parse a number preceeded by a '+' or '-'. */
62template<typename int_type, typename charT>
63inline
64int_type
65var_string_to_int(std::istreambuf_iterator<charT>& itr,
66                  std::istreambuf_iterator<charT>& stream_end,
67                  unsigned int max_length)
68{
69  typedef std::basic_string<charT>  string_type;
70  unsigned int j = 0;
71  string_type s;
72  while ((j < max_length) && std::isdigit(*itr)) {
73    s += (*itr);
74    itr++;
75    j++;
76  }
77  int_type i = -1;
78  if(s.length() != 0) {
79    i = boost::lexical_cast<int_type>(s);
80  }
81  return i;
82}
83
84
85//! Class with generic date parsing using a format string
86/*! The following is the set of recognized format specifiers
87 -  %a - Short weekday name
88 -  %A - Long weekday name
89 -  %b - Abbreviated month name
90 -  %B - Full month name
91 -  %d - Day of the month as decimal 01 to 31
92 -  %j - Day of year as decimal from 001 to 366
93 -  %m - Month name as a decimal 01 to 12
94 -  %U - Week number 00 to 53 with first Sunday as the first day of week 1?
95 -  %w - Weekday as decimal number 0 to 6 where Sunday == 0
96 -  %W - Week number 00 to 53 where Monday is first day of week 1
97 -  %x - facet default date representation
98 -  %y - Year without the century - eg: 04 for 2004
99 -  %Y - Year with century
100
101 The weekday specifiers (%a and %A) do not add to the date construction,
102 but they provide a way to skip over the weekday names for formats that
103 provide them.
104
105 todo -- Another interesting feature that this approach could provide is
106         an option to fill in any missing fields with the current values
107         from the clock.  So if you have %m-%d the parser would detect
108         the missing year value and fill it in using the clock.
109
110 todo -- What to do with the %x.  %x in the classic facet is just bad...
111
112 */
113template<class date_type, typename charT>
114class format_date_parser
115{
116 public:
117  typedef std::basic_string<charT>        string_type;
118  typedef std::basic_stringstream<charT>  stringstream_type;
119  typedef std::istreambuf_iterator<charT> stream_itr_type;
120  typedef typename string_type::const_iterator const_itr;
121  typedef typename date_type::year_type  year_type;
122  typedef typename date_type::month_type month_type;
123  typedef typename date_type::day_type day_type;
124  typedef typename date_type::duration_type duration_type;
125  typedef typename date_type::day_of_week_type day_of_week_type;
126  typedef typename date_type::day_of_year_type day_of_year_type;
127  typedef string_parse_tree<charT> parse_tree_type;
128  typedef typename parse_tree_type::parse_match_result_type match_results;
129  typedef std::vector<std::basic_string<charT> > input_collection_type;
130
131  // TODO sv_parser uses its default constructor - write the others
132 
133  format_date_parser(const string_type& format,
134                     const input_collection_type& month_short_names,
135                     const input_collection_type& month_long_names,
136                     const input_collection_type& weekday_short_names,
137                     const input_collection_type& weekday_long_names) :
138    m_format(format),
139    m_month_short_names(month_short_names, 1),
140    m_month_long_names(month_long_names, 1),
141    m_weekday_short_names(weekday_short_names),
142    m_weekday_long_names(weekday_long_names)
143  {}
144 
145  format_date_parser(const string_type& format,
146                     const std::locale& locale) :
147    m_format(format),
148    m_month_short_names(gather_month_strings<charT>(locale), 1),
149    m_month_long_names(gather_month_strings<charT>(locale, false), 1),
150    m_weekday_short_names(gather_weekday_strings<charT>(locale)),
151    m_weekday_long_names(gather_weekday_strings<charT>(locale, false))
152  {}
153
154  format_date_parser(const format_date_parser<date_type,charT>& fdp)
155  {
156    this->m_format = fdp.m_format;
157    this->m_month_short_names = fdp.m_month_short_names;
158    this->m_month_long_names = fdp.m_month_long_names;
159    this->m_weekday_short_names = fdp.m_weekday_short_names;
160    this->m_weekday_long_names = fdp.m_weekday_long_names;
161  }
162 
163  string_type format() const
164  {
165    return m_format;
166  }
167
168  void format(string_type format)
169  {
170    m_format = format;
171  }
172
173  void short_month_names(const input_collection_type& month_names)
174  {
175    m_month_short_names = parse_tree_type(month_names, 1);
176  }
177  void long_month_names(const input_collection_type& month_names)
178  {
179    m_month_long_names = parse_tree_type(month_names, 1);
180  }
181  void short_weekday_names(const input_collection_type& weekday_names)
182  {
183    m_weekday_short_names = parse_tree_type(weekday_names);
184  }
185  void long_weekday_names(const input_collection_type& weekday_names)
186  {
187    m_weekday_long_names = parse_tree_type(weekday_names);
188  }
189
190  date_type
191  parse_date(const string_type& value,
192             const string_type& format,
193             const special_values_parser<date_type,charT>& sv_parser) const
194  {
195    stringstream_type ss;
196    ss << value;
197    stream_itr_type sitr(ss);
198    stream_itr_type stream_end;
199    return parse_date(sitr, stream_end, format, sv_parser);
200  }
201
202  date_type
203  parse_date(std::istreambuf_iterator<charT>& sitr,
204             std::istreambuf_iterator<charT>& stream_end,
205             const special_values_parser<date_type,charT>& sv_parser) const
206  {
207    return parse_date(sitr, stream_end, m_format, sv_parser);
208  }
209
210  /*! Of all the objects that the format_date_parser can parse, only a
211   * date can be a special value. Therefore, only parse_date checks
212   * for special_values. */
213  date_type
214  parse_date(std::istreambuf_iterator<charT>& sitr,
215             std::istreambuf_iterator<charT>& stream_end,
216             string_type format,
217             const special_values_parser<date_type,charT>& sv_parser) const
218  {
219    bool use_current_char = false;
220   
221    // skip leading whitespace
222    while(std::isspace(*sitr) && sitr != stream_end) { ++sitr; }
223    charT current_char = *sitr;
224
225    short year(0), month(0), day(0), day_of_year(0);// wkday(0);
226    /* Initialized the following to their minimum values. These intermediate
227     * objects are used so we get specific exceptions when part of the input
228     * is unparsable.
229     * Ex: "205-Jan-15" will throw a bad_year, "2005-Jsn-15"- bad_month, etc.*/
230    year_type t_year(1400);
231    month_type t_month(1);
232    day_type t_day(1);
233    day_of_week_type wkday(0);
234   
235   
236    const_itr itr(format.begin());
237    while (itr != format.end() && (sitr != stream_end)) {
238      if (*itr == '%') {
239        itr++;
240        if (*itr != '%') {
241          switch(*itr) {
242          case 'a':
243            {
244              //this value is just throw away.  It could be used for
245              //error checking potentially, but it isn't helpful in
246              //actually constructing the date - we just need to get it
247              //out of the stream
248              match_results mr = m_weekday_short_names.match(sitr, stream_end);
249              if(mr.current_match == match_results::PARSE_ERROR) {
250                // check special_values
251                if(sv_parser.match(sitr, stream_end, mr)) {
252                  return date_type(static_cast<special_values>(mr.current_match));
253                }
254              }
255              wkday = mr.current_match;
256              if (mr.has_remaining()) {
257                current_char = mr.last_char();
258                use_current_char = true;
259              }
260              break;
261            }
262          case 'A':
263            {
264              //this value is just throw away.  It could be used for
265              //error checking potentially, but it isn't helpful in
266              //actually constructing the date - we just need to get it
267              //out of the stream
268              match_results mr = m_weekday_long_names.match(sitr, stream_end);
269              if(mr.current_match == match_results::PARSE_ERROR) {
270                // check special_values
271                if(sv_parser.match(sitr, stream_end, mr)) {
272                  return date_type(static_cast<special_values>(mr.current_match));
273                }
274              }
275              wkday = mr.current_match;
276              if (mr.has_remaining()) {
277                current_char = mr.last_char();
278                use_current_char = true;
279              }
280              break;
281            }
282          case 'b':
283            {
284              match_results mr = m_month_short_names.match(sitr, stream_end);
285              if(mr.current_match == match_results::PARSE_ERROR) {
286                // check special_values
287                if(sv_parser.match(sitr, stream_end, mr)) {
288                  return date_type(static_cast<special_values>(mr.current_match));
289                }
290              }
291              t_month = month_type(mr.current_match);
292              if (mr.has_remaining()) {
293                current_char = mr.last_char();
294                use_current_char = true;
295              }
296              break;
297            }
298          case 'B':
299            {
300              match_results mr = m_month_long_names.match(sitr, stream_end);
301              if(mr.current_match == match_results::PARSE_ERROR) {
302                // check special_values
303                if(sv_parser.match(sitr, stream_end, mr)) {
304                  return date_type(static_cast<special_values>(mr.current_match));
305                }
306              }
307              t_month = month_type(mr.current_match);
308              if (mr.has_remaining()) {
309                current_char = mr.last_char();
310                use_current_char = true;
311              }
312              break;
313            }
314          case 'd':
315            {
316              match_results mr;
317              day = fixed_string_to_int<short, charT>(sitr, stream_end, mr, 2);
318              if(day == -1) {
319                if(sv_parser.match(sitr, stream_end, mr)) {
320                  return date_type(static_cast<special_values>(mr.current_match));
321                }
322              }
323              t_day = day_type(day);
324              break;
325            }
326          case 'j':
327            {
328              match_results mr;
329              day_of_year = fixed_string_to_int<short, charT>(sitr, stream_end, mr, 3);
330              if(day_of_year == -1) {
331                if(sv_parser.match(sitr, stream_end, mr)) {
332                  return date_type(static_cast<special_values>(mr.current_match));
333                }
334              }
335              // these next two lines are so we get an exception with bad input
336              day_of_year_type t_day_of_year(1);
337              t_day_of_year = day_of_year_type(day_of_year);
338              break;
339            }
340          case 'm':
341            {
342              match_results mr;
343              month = fixed_string_to_int<short, charT>(sitr, stream_end, mr, 2);
344              if(month == -1) {
345                if(sv_parser.match(sitr, stream_end, mr)) {
346                  return date_type(static_cast<special_values>(mr.current_match));
347                }
348              }
349              t_month = month_type(month);
350              break;
351            }
352          case 'Y':
353            {
354              match_results mr;
355              year = fixed_string_to_int<short, charT>(sitr, stream_end, mr, 4);
356              if(year == -1) {
357                if(sv_parser.match(sitr, stream_end, mr)) {
358                  return date_type(static_cast<special_values>(mr.current_match));
359                }
360              }
361              t_year = year_type(year);
362              break;
363            }
364          case 'y':
365            {
366              match_results mr;
367              year = fixed_string_to_int<short, charT>(sitr, stream_end, mr, 2);
368              if(year == -1) {
369                match_results mr;
370                if(sv_parser.match(sitr, stream_end, mr)) {
371                  return date_type(static_cast<special_values>(mr.current_match));
372                }
373              }
374              year += 2000; //make 2 digit years in this century
375              t_year = year_type(year);
376              break;
377            }
378          default:
379            {} //ignore those we don't understand
380           
381          }//switch
382         
383        }
384        else { // itr == '%', second consecutive
385          sitr++;
386        }
387       
388        itr++; //advance past format specifier
389      }
390      else {  //skip past chars in format and in buffer
391        itr++;
392        if (use_current_char) {
393          use_current_char = false;
394          current_char = *sitr;
395        }
396        else {
397          sitr++;
398        }
399      }
400    }
401   
402    if (day_of_year > 0) {
403      date_type d(static_cast<unsigned short>(year-1),12,31); //end of prior year
404      return d + duration_type(day_of_year);
405    }
406   
407    return date_type(t_year, t_month, t_day); // exceptions were thrown earlier
408                                        // if input was no good
409  }
410 
411  //! Throws bad_month if unable to parse
412  month_type
413  parse_month(std::istreambuf_iterator<charT>& sitr,
414             std::istreambuf_iterator<charT>& stream_end,
415             string_type format) const
416  {
417    match_results mr;
418    return parse_month(sitr, stream_end, format, mr);
419  }
420 
421  //! Throws bad_month if unable to parse
422  month_type
423  parse_month(std::istreambuf_iterator<charT>& sitr,
424             std::istreambuf_iterator<charT>& stream_end,
425             string_type format,
426             match_results& mr) const
427  {
428    bool use_current_char = false;
429   
430    // skip leading whitespace
431    while(std::isspace(*sitr) && sitr != stream_end) { ++sitr; }
432    charT current_char = *sitr;
433
434    short month(0);
435   
436    const_itr itr(format.begin());
437    while (itr != format.end() && (sitr != stream_end)) {
438      if (*itr == '%') {
439        itr++;
440        if (*itr != '%') {
441          switch(*itr) {
442          case 'b':
443            {
444              mr = m_month_short_names.match(sitr, stream_end);
445              month = mr.current_match;
446              if (mr.has_remaining()) {
447                current_char = mr.last_char();
448                use_current_char = true;
449              }
450              break;
451            }
452          case 'B':
453            {
454              mr = m_month_long_names.match(sitr, stream_end);
455              month = mr.current_match;
456              if (mr.has_remaining()) {
457                current_char = mr.last_char();
458                use_current_char = true;
459              }
460              break;
461            }
462          case 'm':
463            {
464              month = var_string_to_int<short, charT>(sitr, stream_end, 2);
465              // var_string_to_int returns -1 if parse failed. That will
466              // cause a bad_month exception to be thrown so we do nothing here
467              break;
468            }
469          default:
470            {} //ignore those we don't understand
471           
472          }//switch
473         
474        }
475        else { // itr == '%', second consecutive
476          sitr++;
477        }
478       
479        itr++; //advance past format specifier
480      }
481      else {  //skip past chars in format and in buffer
482        itr++;
483        if (use_current_char) {
484          use_current_char = false;
485          current_char = *sitr;
486        }
487        else {
488          sitr++;
489        }
490      }
491    }
492   
493    return month_type(month); // throws bad_month exception when values are zero
494  }
495
496  //! throws bad_day_of_month if unable to parse
497  day_type
498  parse_day_of_month(std::istreambuf_iterator<charT>& sitr,
499                     std::istreambuf_iterator<charT>& stream_end) const
500  {
501    // skip leading whitespace
502    while(std::isspace(*sitr) && sitr != stream_end) { ++sitr; }
503
504    return day_type(var_string_to_int<short, charT>(sitr, stream_end, 2));
505  }
506
507  day_of_week_type
508  parse_weekday(std::istreambuf_iterator<charT>& sitr,
509             std::istreambuf_iterator<charT>& stream_end,
510             string_type format) const
511  {
512    match_results mr;
513    return parse_weekday(sitr, stream_end, format, mr);
514  }
515  day_of_week_type
516  parse_weekday(std::istreambuf_iterator<charT>& sitr,
517             std::istreambuf_iterator<charT>& stream_end,
518             string_type format,
519             match_results& mr) const
520  {
521    bool use_current_char = false;
522   
523    // skip leading whitespace
524    while(std::isspace(*sitr) && sitr != stream_end) { ++sitr; }
525    charT current_char = *sitr;
526
527    short wkday(0);
528   
529    const_itr itr(format.begin());
530    while (itr != format.end() && (sitr != stream_end)) {
531      if (*itr == '%') {
532        itr++;
533        if (*itr != '%') {
534          switch(*itr) {
535          case 'a':
536            {
537              //this value is just throw away.  It could be used for
538              //error checking potentially, but it isn't helpful in
539              //actually constructing the date - we just need to get it
540              //out of the stream
541              mr = m_weekday_short_names.match(sitr, stream_end);
542              wkday = mr.current_match;
543              if (mr.has_remaining()) {
544                current_char = mr.last_char();
545                use_current_char = true;
546              }
547              break;
548            }
549          case 'A':
550            {
551              //this value is just throw away.  It could be used for
552              //error checking potentially, but it isn't helpful in
553              //actually constructing the date - we just need to get it
554              //out of the stream
555              mr = m_weekday_long_names.match(sitr, stream_end);
556              wkday = mr.current_match;
557              if (mr.has_remaining()) {
558                current_char = mr.last_char();
559                use_current_char = true;
560              }
561              break;
562            }
563          case 'w':
564            {
565              // weekday as number 0-6, Sunday == 0
566              wkday = var_string_to_int<short, charT>(sitr, stream_end, 2);
567              break;
568            }
569          default:
570            {} //ignore those we don't understand
571           
572          }//switch
573         
574        }
575        else { // itr == '%', second consecutive
576          sitr++;
577        }
578       
579        itr++; //advance past format specifier
580      }
581      else {  //skip past chars in format and in buffer
582        itr++;
583        if (use_current_char) {
584          use_current_char = false;
585          current_char = *sitr;
586        }
587        else {
588          sitr++;
589        }
590      }
591    }
592   
593    return day_of_week_type(wkday); // throws bad_day_of_month exception
594                                    // when values are zero
595  }
596 
597  //! throws bad_year if unable to parse
598  year_type
599  parse_year(std::istreambuf_iterator<charT>& sitr,
600             std::istreambuf_iterator<charT>& stream_end,
601             string_type format) const
602  {
603    match_results mr;
604    return parse_year(sitr, stream_end, format, mr);
605  }
606
607  //! throws bad_year if unable to parse
608  year_type
609  parse_year(std::istreambuf_iterator<charT>& sitr,
610             std::istreambuf_iterator<charT>& stream_end,
611             string_type format,
612             match_results& mr) const
613  {
614    bool use_current_char = false;
615   
616    // skip leading whitespace
617    while(std::isspace(*sitr) && sitr != stream_end) { ++sitr; }
618    charT current_char = *sitr;
619
620    unsigned short year(0);
621   
622    const_itr itr(format.begin());
623    while (itr != format.end() && (sitr != stream_end)) {
624      if (*itr == '%') {
625        itr++;
626        if (*itr != '%') {
627          //match_results mr;
628          switch(*itr) {
629          case 'Y':
630            {
631              // year from 4 digit string
632              year = fixed_string_to_int<short, charT>(sitr, stream_end, mr, 4);
633              break;
634            }
635          case 'y':
636            {
637              // year from 2 digit string (no century)
638              year = fixed_string_to_int<short, charT>(sitr, stream_end, mr, 2);
639              year += 2000; //make 2 digit years in this century
640              break;
641            }
642          default:
643            {} //ignore those we don't understand
644           
645          }//switch
646         
647        }
648        else { // itr == '%', second consecutive
649          sitr++;
650        }
651       
652        itr++; //advance past format specifier
653      }
654      else {  //skip past chars in format and in buffer
655        itr++;
656        if (use_current_char) {
657          use_current_char = false;
658          current_char = *sitr;
659        }
660        else {
661          sitr++;
662        }
663      }
664    }
665   
666    return year_type(year); // throws bad_year exception when values are zero
667  }
668 
669 
670 private:
671  string_type m_format;
672  parse_tree_type m_month_short_names;
673  parse_tree_type m_month_long_names;
674  parse_tree_type m_weekday_short_names;
675  parse_tree_type m_weekday_long_names;
676
677};
678
679} } //namespace
680
681#endif
682
Note: See TracBrowser for help on using the repository browser.