Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

cpp_re2c_lexer.hpp @ 857

Revision 857, 12.9 KB checked in by igarcia, 19 years ago (diff)

Line
1	/*=============================================================================
2	Boost.Wave: A Standard compliant C++ preprocessor library
3
4	Re2C based C++ lexer
5
6	http://www.boost.org/
7
8	Copyright (c) 2001-2005 Hartmut Kaiser. Distributed under the Boost
9	Software License, Version 1.0. (See accompanying file
10	LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
11	=============================================================================*/
12
13	#if !defined(CPP_RE2C_LEXER_HPP_B81A2629_D5B1_4944_A97D_60254182B9A8_INCLUDED)
14	#define CPP_RE2C_LEXER_HPP_B81A2629_D5B1_4944_A97D_60254182B9A8_INCLUDED
15
16	#include <string>
17	#include <cstdio>
18	#include <cstdarg>
19	#if defined(BOOST_SPIRIT_DEBUG)
20	#include <iostream>
21	#endif // defined(BOOST_SPIRIT_DEBUG)
22
23	#include <boost/concept_check.hpp>
24	#include <boost/assert.hpp>
25	#include <boost/spirit/core.hpp>
26
27	#include <boost/wave/wave_config.hpp>
28	#include <boost/wave/language_support.hpp>
29	#include <boost/wave/token_ids.hpp>
30	#include <boost/wave/util/file_position.hpp>
31	#include <boost/wave/cpplexer/validate_universal_char.hpp>
32	#include <boost/wave/cpplexer/cpplexer_exceptions.hpp>
33	#include <boost/wave/cpplexer/token_cache.hpp>
34	#include <boost/wave/cpplexer/convert_trigraphs.hpp>
35
36	#include <boost/wave/cpplexer/cpp_lex_token.hpp>
37	#include <boost/wave/cpplexer/cpp_lex_interface.hpp>
38	#include <boost/wave/cpplexer/re2clex/scanner.hpp>
39	#include <boost/wave/cpplexer/re2clex/cpp_re.hpp>
40
41	///////////////////////////////////////////////////////////////////////////////
42	namespace boost {
43	namespace wave {
44	namespace cpplexer {
45	namespace re2clex {
46
47	///////////////////////////////////////////////////////////////////////////////
48	//
49	// encapsulation of the re2c based cpp lexer
50	//
51	///////////////////////////////////////////////////////////////////////////////
52
53	template <typename IteratorT, typename PositionT = boost::wave::util::file_position_type>
54	class lexer
55	{
56	public:
57
58	typedef char char_t;
59	typedef Scanner base_t;
60	typedef lex_token<PositionT> token_type;
61	typedef typename token_type::string_type string_type;
62
63	lexer(IteratorT const &first, IteratorT const &last,
64	PositionT const &pos, boost::wave::language_support language);
65	~lexer();
66
67	lex_token<PositionT> get();
68	void set_position(PositionT const &pos)
69	{
70	// set position has to change the file name and line number only
71	filename = pos.get_file();
72	scanner.line = pos.get_line();
73	// scanner.column = scanner.curr_column = pos.get_column();
74	scanner.file_name = filename.c_str();
75	}
76
77	// error reporting from the re2c generated lexer
78	static int report_error(Scanner const* s, char const *, ...);
79
80	private:
81	static char const *tok_names[];
82
83	Scanner scanner;
84	string_type filename;
85	string_type value;
86	bool at_eof;
87	boost::wave::language_support language;
88
89	static token_cache<string_type> const cache;
90	};
91
92	///////////////////////////////////////////////////////////////////////////////
93	// initialize cpp lexer
94	template <typename IteratorT, typename PositionT>
95	inline
96	lexer<IteratorT, PositionT>::lexer(IteratorT const &first,
97	IteratorT const &last, PositionT const &pos,
98	boost::wave::language_support language)
99	: filename(pos.get_file()), at_eof(false), language(language)
100	{
101	using namespace std; // some systems have memset in std
102	memset(&scanner, '\0', sizeof(Scanner));
103	scanner.fd = -1;
104	scanner.eol_offsets = aq_create();
105	scanner.first = scanner.act = (uchar )&(first);
106	scanner.last = scanner.first + std::distance(first, last);
107	scanner.line = pos.get_line();
108	scanner.column = scanner.curr_column = pos.get_column();
109	scanner.error_proc = report_error;
110	scanner.file_name = filename.c_str();
111
112	#if BOOST_WAVE_SUPPORT_MS_EXTENSIONS != 0
113	scanner.enable_ms_extensions = 1;
114	#else
115	scanner.enable_ms_extensions = 0;
116	#endif
117
118	#if BOOST_WAVE_SUPPORT_VARIADICS_PLACEMARKERS != 0
119	scanner.act_in_c99_mode = boost::wave::need_c99(language);
120	#endif
121	}
122
123	template <typename IteratorT, typename PositionT>
124	inline
125	lexer<IteratorT, PositionT>::~lexer()
126	{
127	using namespace std; // some systems have memset in std
128	aq_terminate(scanner.eol_offsets);
129	free(scanner.bot);
130	}
131
132	///////////////////////////////////////////////////////////////////////////////
133	// get the next token from the input stream
134	template <typename IteratorT, typename PositionT>
135	inline lex_token<PositionT>
136	lexer<IteratorT, PositionT>::get()
137	{
138	if (at_eof)
139	return lex_token<PositionT>(); // return T_EOI
140
141	unsigned int actline = scanner.line;
142	token_id id = token_id(scan(&scanner));
143
144	switch (static_cast<unsigned int>(id)) {
145	case T_IDENTIFIER:
146	// test identifier characters for validity (throws if invalid chars found)
147	value = string_type((char const *)scanner.tok,
148	scanner.cur-scanner.tok);
149	if (!(language & support_option_no_character_validation))
150	impl::validate_identifier_name(value, actline, scanner.column, filename);
151	break;
152
153	case T_STRINGLIT:
154	case T_CHARLIT:
155	// test literal characters for validity (throws if invalid chars found)
156	value = string_type((char const *)scanner.tok,
157	scanner.cur-scanner.tok);
158	if (language & support_option_convert_trigraphs)
159	value = impl::convert_trigraphs(value, actline, scanner.column, filename);
160	if (!(language & support_option_no_character_validation))
161	impl::validate_literal(value, actline, scanner.column, filename);
162	break;
163
164	#if BOOST_WAVE_SUPPORT_INCLUDE_NEXT != 0
165	case T_PP_HHEADER:
166	case T_PP_QHEADER:
167	case T_PP_INCLUDE:
168	// convert to the corresponding ..._next token, if appropriate
169	{
170	value = string_type((char const *)scanner.tok,
171	scanner.cur-scanner.tok);
172
173	// Skip '#' and whitespace and see whether we find an 'include_next' here.
174	typename string_type::size_type start = value.find("include");
175	if (value.compare(start, 12, "include_next", 12) == 0)
176	id = token_id(id \| AltTokenType);
177	break;
178	}
179	#endif
180
181	case T_LONGINTLIT: // supported in C99 and long_long mode
182	value = string_type((char const *)scanner.tok,
183	scanner.cur-scanner.tok);
184	if (!boost::wave::need_long_long(language)) {
185	// syntax error: not allowed in C++ mode
186	BOOST_WAVE_LEXER_THROW(lexing_exception, invalid_long_long_literal,
187	value.c_str(), actline, scanner.column, filename.c_str());
188	}
189	break;
190
191	case T_OCTALINT:
192	case T_DECIMALINT:
193	case T_HEXAINT:
194	case T_INTLIT:
195	case T_FLOATLIT:
196	case T_FIXEDPOINTLIT:
197	case T_CCOMMENT:
198	case T_CPPCOMMENT:
199	case T_SPACE:
200	case T_SPACE2:
201	case T_ANY:
202	value = string_type((char const *)scanner.tok,
203	scanner.cur-scanner.tok);
204	break;
205
206	case T_EOF:
207	// T_EOF is returned as a valid token, the next call will return T_EOI,
208	// i.e. the actual end of input
209	at_eof = true;
210	value.clear();
211	break;
212
213	case T_OR_TRIGRAPH:
214	case T_XOR_TRIGRAPH:
215	case T_LEFTBRACE_TRIGRAPH:
216	case T_RIGHTBRACE_TRIGRAPH:
217	case T_LEFTBRACKET_TRIGRAPH:
218	case T_RIGHTBRACKET_TRIGRAPH:
219	case T_COMPL_TRIGRAPH:
220	case T_POUND_TRIGRAPH:
221	if (language & support_option_convert_trigraphs) {
222	value = cache.get_token_value(BASEID_FROM_TOKEN(id));
223	}
224	else {
225	value = string_type((char const *)scanner.tok,
226	scanner.cur-scanner.tok);
227	}
228	break;
229
230	case T_ANY_TRIGRAPH:
231	if (language & support_option_convert_trigraphs) {
232	value = impl::convert_trigraph(
233	string_type((char const *)scanner.tok,
234	scanner.cur-scanner.tok),
235	actline, scanner.column, filename);
236	}
237	else {
238	value = string_type((char const *)scanner.tok,
239	scanner.cur-scanner.tok);
240	}
241	break;
242
243	default:
244	if (CATEGORY_FROM_TOKEN(id) != EXTCATEGORY_FROM_TOKEN(id) \|\|
245	IS_CATEGORY(id, UnknownTokenType))
246	{
247	value = string_type((char const *)scanner.tok,
248	scanner.cur-scanner.tok);
249	}
250	else {
251	value = cache.get_token_value(id);
252	}
253	break;
254	}
255
256	// the re2c lexer reports the new line number for newline tokens
257	return lex_token<PositionT>(id, value,
258	PositionT(filename, actline, scanner.column));
259	}
260
261	template <typename IteratorT, typename PositionT>
262	inline int
263	lexer<IteratorT, PositionT>::report_error(Scanner const s, char const msg, ...)
264	{
265	BOOST_ASSERT(0 != s);
266	BOOST_ASSERT(0 != msg);
267
268	using namespace std; // some system have vsprintf in namespace std
269
270	char buffer[200]; // should be large enough
271	va_list params;
272	va_start(params, msg);
273	vsprintf(buffer, msg, params);
274	va_end(params);
275
276	BOOST_WAVE_LEXER_THROW(lexing_exception, generic_lexing_error, buffer,
277	s->line, s->column, s->file_name);
278	BOOST_UNREACHABLE_RETURN(0);
279	}
280
281	///////////////////////////////////////////////////////////////////////////////
282	//
283	// lex_functor
284	//
285	///////////////////////////////////////////////////////////////////////////////
286
287	template <typename IteratorT, typename PositionT = boost::wave::util::file_position_type>
288	class lex_functor
289	: public lex_input_interface<typename lexer<IteratorT, PositionT>::token_type>
290	{
291	public:
292
293	typedef typename lexer<IteratorT, PositionT>::token_type token_type;
294
295	lex_functor(IteratorT const &first, IteratorT const &last,
296	PositionT const &pos, boost::wave::language_support language)
297	: lexer(first, last, pos, language)
298	{}
299	virtual ~lex_functor() {}
300
301	// get the next token from the input stream
302	token_type get() { return lexer.get(); }
303	void set_position(PositionT const &pos)
304	{ lexer.set_position(pos); }
305
306	private:
307	lexer<IteratorT, PositionT> lexer;
308	};
309
310	///////////////////////////////////////////////////////////////////////////////
311	template <typename IteratorT, typename PositionT>
312	token_cache<typename lexer<IteratorT, PositionT>::string_type> const
313	lexer<IteratorT, PositionT>::cache =
314	token_cache<typename lexer<IteratorT, PositionT>::string_type>();
315
316	} // namespace re2clex
317
318	///////////////////////////////////////////////////////////////////////////////
319	//
320	// The new_lexer_gen<>::new_lexer function (declared in cpp_lex_interface.hpp)
321	// should be defined inline, if the lex_functor shouldn't be instantiated
322	// separately from the lex_iterator.
323	//
324	// Separate (explicit) instantiation helps to reduce compilation time.
325	//
326	///////////////////////////////////////////////////////////////////////////////
327
328	#if BOOST_WAVE_SEPARATE_LEXER_INSTANTIATION != 0
329	#define BOOST_WAVE_RE2C_NEW_LEXER_INLINE
330	#else
331	#define BOOST_WAVE_RE2C_NEW_LEXER_INLINE inline
332	#endif
333
334	///////////////////////////////////////////////////////////////////////////////
335	//
336	// The 'new_lexer' function allows the opaque generation of a new lexer object.
337	// It is coupled to the iterator type to allow to decouple the lexer/iterator
338	// configurations at compile time.
339	//
340	// This function is declared inside the cpp_slex_token.hpp file, which is
341	// referenced by the source file calling the lexer and the source file, which
342	// instantiates the lex_functor. But is is defined here, so it will be
343	// instantiated only while compiling the source file, which instantiates the
344	// lex_functor. While the cpp_re2c_token.hpp file may be included everywhere,
345	// this file (cpp_re2c_lexer.hpp) should be included only once. This allows
346	// to decouple the lexer interface from the lexer implementation and reduces
347	// compilation time.
348	//
349	///////////////////////////////////////////////////////////////////////////////
350
351	template <typename IteratorT, typename PositionT>
352	BOOST_WAVE_RE2C_NEW_LEXER_INLINE
353	lex_input_interface<lex_token<PositionT> > *
354	new_lexer_gen<IteratorT, PositionT>::new_lexer(IteratorT const &first,
355	IteratorT const &last, PositionT const &pos,
356	boost::wave::language_support language)
357	{
358	return new re2clex::lex_functor<IteratorT, PositionT>(first, last, pos,
359	language);
360	}
361
362	#undef BOOST_WAVE_RE2C_NEW_LEXER_INLINE
363
364	///////////////////////////////////////////////////////////////////////////////
365	} // namespace cpplexer
366	} // namespace wave
367	} // namespace boost
368
369	#endif // !defined(CPP_RE2C_LEXER_HPP_B81A2629_D5B1_4944_A97D_60254182B9A8_INCLUDED)

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: NonGTP/Boost/boost/wave/cpplexer/re2clex/cpp_re2c_lexer.hpp @ 857

Download in other formats: