source: NonGTP/Boost/boost/regex/v4/perl_matcher.hpp @ 857

Revision 857, 15.5 KB checked in by igarcia, 18 years ago (diff)
Line 
1/*
2 *
3 * Copyright (c) 2002
4 * John Maddock
5 *
6 * Use, modification and distribution are subject to the
7 * Boost Software License, Version 1.0. (See accompanying file
8 * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
9 *
10 */
11
12#ifndef BOOST_REGEX_MATCHER_HPP
13#define BOOST_REGEX_MATCHER_HPP
14
15#include <boost/regex/v4/iterator_category.hpp>
16
17#ifdef BOOST_HAS_ABI_HEADERS
18#  include BOOST_ABI_PREFIX
19#endif
20
21namespace boost{
22namespace re_detail{
23
24//
25// error checking API:
26//
27BOOST_REGEX_DECL void BOOST_REGEX_CALL verify_options(boost::regex_constants::syntax_option_type ef, match_flag_type mf);
28//
29// function can_start:
30//
31template <class charT>
32bool can_start(charT c, const unsigned char* map, unsigned char mask)
33{
34   return ((c < static_cast<charT>(0)) ? true : ((c >= static_cast<charT>(1 << CHAR_BIT)) ? true : map[c] & mask));
35}
36inline bool can_start(char c, const unsigned char* map, unsigned char mask)
37{
38   return map[(unsigned char)c] & mask;
39}
40inline bool can_start(signed char c, const unsigned char* map, unsigned char mask)
41{
42   return map[(unsigned char)c] & mask;
43}
44inline bool can_start(unsigned char c, const unsigned char* map, unsigned char mask)
45{
46   return map[c] & mask;
47}
48inline bool can_start(unsigned short c, const unsigned char* map, unsigned char mask)
49{
50   return ((c >= (1 << CHAR_BIT)) ? true : map[c] & mask);
51}
52#if !defined(__HP_aCC)
53#if defined(WCHAR_MIN) && (WCHAR_MIN == 0) && !defined(BOOST_NO_INTRINSIC_WCHAR_T)
54inline bool can_start(wchar_t c, const unsigned char* map, unsigned char mask)
55{
56   return ((c >= (1 << CHAR_BIT)) ? true : map[c] & mask);
57}
58#endif
59#endif
60
61
62//
63// Unfortunately Rogue Waves standard library appears to have a bug
64// in std::basic_string::compare that results in eroneous answers
65// in some cases (tested with Borland C++ 5.1, Rogue Wave lib version
66// 0x020101) the test case was:
67// {39135,0} < {0xff,0}
68// which succeeds when it should not.
69//
70#ifndef _RWSTD_VER
71#if !BOOST_WORKAROUND(BOOST_MSVC, < 1310)
72template <class C, class T, class A>
73inline int string_compare(const std::basic_string<C,T,A>& s, const C* p)
74{
75   if(0 == *p)
76   {
77      if(s.empty() || ((s.size() == 1) && (s[0] == 0)))
78         return 0;
79   }
80   return s.compare(p);
81}
82#endif
83#else
84#if !BOOST_WORKAROUND(BOOST_MSVC, < 1310)
85template <class C, class T, class A>
86inline int string_compare(const std::basic_string<C,T,A>& s, const C* p)
87{
88   if(0 == *p)
89   {
90      if(s.empty() || ((s.size() == 1) && (s[0] == 0)))
91         return 0;
92   }
93   return s.compare(p);
94}
95#endif
96inline int string_compare(const std::string& s, const char* p)
97{ return std::strcmp(s.c_str(), p); }
98# ifndef BOOST_NO_WREGEX
99inline int string_compare(const std::wstring& s, const wchar_t* p)
100{ return std::wcscmp(s.c_str(), p); }
101#endif
102#endif
103template <class Seq, class C>
104inline int string_compare(const Seq& s, const C* p)
105{
106   std::size_t i = 0;
107   while((i < s.size()) && (p[i] == s[i]))
108   {
109      ++i;
110   }
111   return (i == s.size()) ? -p[i] : s[i] - p[i];
112}
113# define STR_COMP(s,p) string_compare(s,p)
114
115template<class charT>
116inline const charT* re_skip_past_null(const charT* p)
117{
118  while (*p != static_cast<charT>(0)) ++p;
119  return ++p;
120}
121
122template <class iterator, class charT, class traits_type, class char_classT>
123iterator BOOST_REGEX_CALL re_is_set_member(iterator next,
124                          iterator last,
125                          const re_set_long<char_classT>* set_,
126                          const regex_data<charT, traits_type>& e, bool icase)
127{   
128   const charT* p = reinterpret_cast<const charT*>(set_+1);
129   iterator ptr;
130   unsigned int i;
131   //bool icase = e.m_flags & regex_constants::icase;
132
133   if(next == last) return next;
134
135   typedef typename traits_type::string_type traits_string_type;
136   const ::boost::regex_traits_wrapper<traits_type>& traits_inst = *(e.m_ptraits);
137   
138   // dwa 9/13/00 suppress incorrect MSVC warning - it claims this is never
139   // referenced
140   (void)traits_inst;
141
142   // try and match a single character, could be a multi-character
143   // collating element...
144   for(i = 0; i < set_->csingles; ++i)
145   {
146      ptr = next;
147      if(*p == static_cast<charT>(0))
148      {
149         // treat null string as special case:
150         if(traits_inst.translate(*ptr, icase) != *p)
151         {
152            while(*p == static_cast<charT>(0))++p;
153            continue;
154         }
155         return set_->isnot ? next : (ptr == next) ? ++next : ptr;
156      }
157      else
158      {
159         while(*p && (ptr != last))
160         {
161            if(traits_inst.translate(*ptr, icase) != *p)
162               break;
163            ++p;
164            ++ptr;
165         }
166
167         if(*p == static_cast<charT>(0)) // if null we've matched
168            return set_->isnot ? next : (ptr == next) ? ++next : ptr;
169
170         p = re_skip_past_null(p);     // skip null
171      }
172   }
173
174   charT col = traits_inst.translate(*next, icase);
175
176
177   if(set_->cranges || set_->cequivalents)
178   {
179      traits_string_type s1;
180      //
181      // try and match a range, NB only a single character can match
182      if(set_->cranges)
183      {
184         if((e.m_flags & regex_constants::collate) == 0)
185            s1.assign(1, col);
186         else
187         {
188            charT a[2] = { col, charT(0), };
189            s1 = traits_inst.transform(a, a + 1);
190         }
191         for(i = 0; i < set_->cranges; ++i)
192         {
193            if(STR_COMP(s1, p) >= 0)
194            {
195               do{ ++p; }while(*p);
196               ++p;
197               if(STR_COMP(s1, p) <= 0)
198                  return set_->isnot ? next : ++next;
199            }
200            else
201            {
202               // skip first string
203               do{ ++p; }while(*p);
204               ++p;
205            }
206            // skip second string
207            do{ ++p; }while(*p);
208            ++p;
209         }
210      }
211      //
212      // try and match an equivalence class, NB only a single character can match
213      if(set_->cequivalents)
214      {
215         charT a[2] = { col, charT(0), };
216         s1 = traits_inst.transform_primary(a, a +1);
217         for(i = 0; i < set_->cequivalents; ++i)
218         {
219            if(STR_COMP(s1, p) == 0)
220               return set_->isnot ? next : ++next;
221            // skip string
222            do{ ++p; }while(*p);
223            ++p;
224         }
225      }
226   }
227   if(traits_inst.isctype(col, set_->cclasses) == true)
228      return set_->isnot ? next : ++next;
229   if((set_->cnclasses != 0) && (traits_inst.isctype(col, set_->cnclasses) == false))
230      return set_->isnot ? next : ++next;
231   return set_->isnot ? ++next : next;
232}
233
234template <class BidiIterator>
235class repeater_count
236{
237   repeater_count** stack;
238   repeater_count* next;
239   int id;
240   std::size_t count;        // the number of iterations so far
241   BidiIterator start_pos;   // where the last repeat started
242public:
243   repeater_count(repeater_count** s)
244   {
245      stack = s;
246      next = 0;
247      id = -1;
248      count = 0;
249   }
250   repeater_count(int i, repeater_count** s, BidiIterator start)
251      : start_pos(start)
252   {
253      id = i;
254      stack = s;
255      next = *stack;
256      *stack = this;
257      if(id > next->id)
258         count = 0;
259      else
260      {
261         repeater_count* p = next;
262         while(p->id != id)
263            p = p->next;
264         count = p->count;
265         start_pos = p->start_pos;
266      }
267   }
268   ~repeater_count()
269   {
270      *stack = next;
271   }
272   std::size_t get_count() { return count; }
273   int get_id() { return id; }
274   std::size_t operator++() { return ++count; }
275   bool check_null_repeat(const BidiIterator& pos, std::size_t max)
276   {
277      // this is called when we are about to start a new repeat,
278      // if the last one was NULL move our count to max,
279      // otherwise save the current position.
280      bool result = (count == 0) ? false : (pos == start_pos);
281      if(result)
282         count = max;
283      else
284         start_pos = pos;
285      return result;
286   }
287};
288
289struct saved_state;
290
291enum saved_state_type
292{
293   saved_type_end = 0,
294   saved_type_paren = 1,
295   saved_type_recurse = 2,
296   saved_type_assertion = 3,
297   saved_state_alt = 4,
298   saved_state_repeater_count = 5,
299   saved_state_extra_block = 6,
300   saved_state_greedy_single_repeat = 7,
301   saved_state_rep_slow_dot = 8,
302   saved_state_rep_fast_dot = 9,
303   saved_state_rep_char = 10,
304   saved_state_rep_short_set = 11,
305   saved_state_rep_long_set = 12,
306   saved_state_non_greedy_long_repeat = 13,
307   saved_state_count = 14
308};
309
310#ifdef BOOST_MSVC
311#pragma warning(push)
312#pragma warning(disable : 4251 4231 4660)
313#endif
314
315template <class BidiIterator, class Allocator, class traits>
316class perl_matcher
317{
318public:
319   typedef typename traits::char_type char_type;
320   typedef perl_matcher<BidiIterator, Allocator, traits> self_type;
321   typedef bool (self_type::*matcher_proc_type)(void);
322   typedef typename traits::size_type traits_size_type;
323   typedef typename is_byte<char_type>::width_type width_type;
324   typedef typename regex_iterator_traits<BidiIterator>::difference_type difference_type;
325
326   perl_matcher(BidiIterator first, BidiIterator end,
327      match_results<BidiIterator, Allocator>& what,
328      const basic_regex<char_type, traits>& e,
329      match_flag_type f,
330      BidiIterator base);
331
332   bool match();
333   bool find();
334
335   void setf(match_flag_type f)
336   { m_match_flags |= f; }
337   void unsetf(match_flag_type f)
338   { m_match_flags &= ~f; }
339
340private:
341   void construct_init(const basic_regex<char_type, traits>& e, match_flag_type f);
342   bool find_imp();
343   bool match_imp();
344#ifdef BOOST_REGEX_HAS_MS_STACK_GUARD
345   typedef bool (perl_matcher::*protected_proc_type)();
346   bool protected_call(protected_proc_type);
347#endif
348   void estimate_max_state_count(std::random_access_iterator_tag*);
349   void estimate_max_state_count(void*);
350   bool match_prefix();
351   bool match_all_states();
352
353   // match procs, stored in s_match_vtable:
354   bool match_startmark();
355   bool match_endmark();
356   bool match_literal();
357   bool match_start_line();
358   bool match_end_line();
359   bool match_wild();
360   bool match_match();
361   bool match_word_boundary();
362   bool match_within_word();
363   bool match_word_start();
364   bool match_word_end();
365   bool match_buffer_start();
366   bool match_buffer_end();
367   bool match_backref();
368   bool match_long_set();
369   bool match_set();
370   bool match_jump();
371   bool match_alt();
372   bool match_rep();
373   bool match_combining();
374   bool match_soft_buffer_end();
375   bool match_restart_continue();
376   bool match_long_set_repeat();
377   bool match_set_repeat();
378   bool match_char_repeat();
379   bool match_dot_repeat_fast();
380   bool match_dot_repeat_slow();
381   bool match_backstep();
382   bool match_assert_backref();
383   bool match_toggle_case();
384#ifdef BOOST_REGEX_RECURSIVE
385   bool backtrack_till_match(std::size_t count);
386#endif
387
388   // find procs stored in s_find_vtable:
389   bool find_restart_any();
390   bool find_restart_word();
391   bool find_restart_line();
392   bool find_restart_buf();
393   bool find_restart_lit();
394
395private:
396   // final result structure to be filled in:
397   match_results<BidiIterator, Allocator>& m_result;
398   // temporary result for POSIX matches:
399   scoped_ptr<match_results<BidiIterator, Allocator> > m_temp_match;
400   // pointer to actual result structure to fill in:
401   match_results<BidiIterator, Allocator>* m_presult;
402   // start of sequence being searched:
403   BidiIterator base;
404   // end of sequence being searched:
405   BidiIterator last;
406   // current character being examined:
407   BidiIterator position;
408   // where to restart next search after failed match attempt:
409   BidiIterator restart;
410   // where the current search started from, acts as base for $` during grep:
411   BidiIterator search_base;
412   // how far we can go back when matching lookbehind:
413   BidiIterator backstop;
414   // the expression being examined:
415   const basic_regex<char_type, traits>& re;
416   // the expression's traits class:
417   const ::boost::regex_traits_wrapper<traits>& traits_inst;
418   // the next state in the machine being matched:
419   const re_syntax_base* pstate;
420   // matching flags in use:
421   match_flag_type m_match_flags;
422   // how many states we have examined so far:
423   difference_type state_count;
424   // max number of states to examine before giving up:
425   difference_type max_state_count;
426   // whether we should ignore case or not:
427   bool icase;
428   // set to true when (position == last), indicates that we may have a partial match:
429   bool m_has_partial_match;
430   // set to true whenever we get a match:
431   bool m_has_found_match;
432   // set to true whenever we're inside an independent sub-expression:
433   bool m_independent;
434   // the current repeat being examined:
435   repeater_count<BidiIterator>* next_count;
436   // the first repeat being examined (top of linked list):
437   repeater_count<BidiIterator> rep_obj;
438   // the mask to pass when matching word boundaries:
439   typename traits::char_class_type m_word_mask;
440   // the bitmask to use when determining whether a match_any matches a newline or not:
441   unsigned char match_any_mask;
442
443#ifdef BOOST_REGEX_NON_RECURSIVE
444   //
445   // additional members for non-recursive version:
446   //
447   typedef bool (self_type::*unwind_proc_type)(bool);
448
449   void extend_stack();
450   bool unwind(bool);
451   bool unwind_end(bool);
452   bool unwind_paren(bool);
453   bool unwind_recursion_stopper(bool);
454   bool unwind_assertion(bool);
455   bool unwind_alt(bool);
456   bool unwind_repeater_counter(bool);
457   bool unwind_extra_block(bool);
458   bool unwind_greedy_single_repeat(bool);
459   bool unwind_slow_dot_repeat(bool);
460   bool unwind_fast_dot_repeat(bool);
461   bool unwind_char_repeat(bool);
462   bool unwind_short_set_repeat(bool);
463   bool unwind_long_set_repeat(bool);
464   bool unwind_non_greedy_repeat(bool);
465   void destroy_single_repeat();
466   void push_matched_paren(int index, const sub_match<BidiIterator>& sub);
467   void push_recursion_stopper();
468   void push_assertion(const re_syntax_base* ps, bool positive);
469   void push_alt(const re_syntax_base* ps);
470   void push_repeater_count(int i, repeater_count<BidiIterator>** s);
471   void push_single_repeat(std::size_t c, const re_repeat* r, BidiIterator last_position, int id);
472   void push_non_greedy_repeat(const re_syntax_base* ps);
473
474
475   // pointer to base of stack:
476   saved_state* m_stack_base;
477   // pointer to current stack position:
478   saved_state* m_backup_state;
479   // determines what value to return when unwinding from recursion,
480   // allows for mixed recursive/non-recursive algorithm:
481   bool m_recursive_result;
482   // how many memory blocks have we used up?:
483   unsigned used_block_count;
484#endif
485
486   // these operations aren't allowed, so are declared private,
487   // bodies are provided to keep explicit-instantiation requests happy:
488   perl_matcher& operator=(const perl_matcher&)
489   {
490      return *this;
491   }
492   perl_matcher(const perl_matcher& that)
493      : m_result(that.m_result), re(that.re), traits_inst(that.traits_inst), rep_obj(0) {}
494};
495
496#ifdef BOOST_MSVC
497#pragma warning(pop)
498#endif
499
500} // namespace re_detail
501
502#ifdef BOOST_HAS_ABI_HEADERS
503#  include BOOST_ABI_SUFFIX
504#endif
505
506} // namespace boost
507
508//
509// include the implementation of perl_matcher:
510//
511#ifdef BOOST_REGEX_RECURSIVE
512#include <boost/regex/v4/perl_matcher_recursive.hpp>
513#else
514#include <boost/regex/v4/perl_matcher_non_recursive.hpp>
515#endif
516// this one has to be last:
517#include <boost/regex/v4/perl_matcher_common.hpp>
518
519#endif
520
Note: See TracBrowser for help on using the repository browser.