source: NonGTP/Xerces/xercesc/util/regx/RegxParser.hpp @ 188

Revision 188, 12.5 KB checked in by mattausch, 19 years ago (diff)

added xercesc to support

Line 
1/*
2 * The Apache Software License, Version 1.1
3 *
4 * Copyright (c) 2001-2003 The Apache Software Foundation.  All rights
5 * reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 *
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in
16 *    the documentation and/or other materials provided with the
17 *    distribution.
18 *
19 * 3. The end-user documentation included with the redistribution,
20 *    if any, must include the following acknowledgment:
21 *       "This product includes software developed by the
22 *        Apache Software Foundation (http://www.apache.org/)."
23 *    Alternately, this acknowledgment may appear in the software itself,
24 *    if and wherever such third-party acknowledgments normally appear.
25 *
26 * 4. The names "Xerces" and "Apache Software Foundation" must
27 *    not be used to endorse or promote products derived from this
28 *    software without prior written permission. For written
29 *    permission, please contact apache\@apache.org.
30 *
31 * 5. Products derived from this software may not be called "Apache",
32 *    nor may "Apache" appear in their name, without prior written
33 *    permission of the Apache Software Foundation.
34 *
35 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
36 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
37 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
38 * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
39 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
40 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
41 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
42 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
43 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
44 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
45 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
46 * SUCH DAMAGE.
47 * ====================================================================
48 *
49 * This software consists of voluntary contributions made by many
50 * individuals on behalf of the Apache Software Foundation, and was
51 * originally based on software copyright (c) 2001, International
52 * Business Machines, Inc., http://www.ibm.com .  For more information
53 * on the Apache Software Foundation, please see
54 * <http://www.apache.org/>.
55 */
56
57/*
58 * $Id: RegxParser.hpp,v 1.8 2004/01/29 11:51:21 cargilld Exp $
59 */
60
61/*
62 *      A regular expression parser
63 */
64#if !defined(REGXPARSER_HPP)
65#define REGXPARSER_HPP
66
67// ---------------------------------------------------------------------------
68//  Includes
69// ---------------------------------------------------------------------------
70#include <xercesc/util/RefVectorOf.hpp>
71#include <xercesc/util/XMLUniDefs.hpp>
72#include <xercesc/util/Mutexes.hpp>
73
74XERCES_CPP_NAMESPACE_BEGIN
75
76// ---------------------------------------------------------------------------
77//  Forward Declaration
78// ---------------------------------------------------------------------------
79class Token;
80class RangeToken;
81class TokenFactory;
82
83class XMLUTIL_EXPORT RegxParser : public XMemory
84{
85public:
86
87        // -----------------------------------------------------------------------
88    //  Public constant data
89    // -----------------------------------------------------------------------
90    // Parse tokens
91        enum {
92                REGX_T_CHAR                     = 0,
93                REGX_T_EOF                      = 1,
94                REGX_T_OR                       = 2,
95                REGX_T_STAR                     = 3,
96                REGX_T_PLUS                     = 4,
97                REGX_T_QUESTION                 = 5,
98                REGX_T_LPAREN                   = 6,
99                REGX_T_RPAREN                   = 7,
100                REGX_T_DOT                      = 8,
101                REGX_T_LBRACKET                 = 9,
102                REGX_T_BACKSOLIDUS              = 10,
103                REGX_T_CARET                    = 11,
104                REGX_T_DOLLAR                   = 12,
105                REGX_T_LPAREN2                  = 13,
106                REGX_T_LOOKAHEAD                = 14,
107                REGX_T_NEGATIVELOOKAHEAD        = 15,
108                REGX_T_LOOKBEHIND               = 16,
109                REGX_T_NEGATIVELOOKBEHIND       = 17,
110                REGX_T_INDEPENDENT              = 18,
111                REGX_T_SET_OPERATIONS           = 19,
112                REGX_T_POSIX_CHARCLASS_START    = 20,
113                REGX_T_COMMENT                  = 21,
114                REGX_T_MODIFIERS                = 22,
115                REGX_T_CONDITION                = 23,
116                REGX_T_XMLSCHEMA_CC_SUBTRACTION = 24
117        };
118
119        static const unsigned short S_NORMAL;
120        static const unsigned short S_INBRACKETS;
121        static const unsigned short S_INXBRACKETS;
122
123        // -----------------------------------------------------------------------
124    //  Public Constructors and Destructor
125    // -----------------------------------------------------------------------
126        RegxParser(MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager);
127        virtual ~RegxParser();
128
129    // -----------------------------------------------------------------------
130    //  Getter methods
131    // -----------------------------------------------------------------------
132    unsigned short getParseContext() const;
133    unsigned short getState() const;
134    XMLInt32       getCharData() const;
135    int            getNoParen() const;
136        int            getOffset() const;
137        bool           hasBackReferences() const;
138    TokenFactory*  getTokenFactory() const;
139
140        // -----------------------------------------------------------------------
141    //  Setter methods
142    // -----------------------------------------------------------------------
143        void setParseContext(const unsigned short value);
144    void setTokenFactory(TokenFactory* const tokFactory);
145
146        // -----------------------------------------------------------------------
147    //  Public Parsing methods
148    // -----------------------------------------------------------------------
149        Token* parse(const XMLCh* const regxStr, const int options);
150
151protected:
152    // -----------------------------------------------------------------------
153    //  Protected Helper methods
154    // -----------------------------------------------------------------------
155    virtual bool        checkQuestion(const int off);
156        virtual XMLInt32    decodeEscaped();
157    MemoryManager*      getMemoryManager() const;
158    // -----------------------------------------------------------------------
159    //  Protected Parsing/Processing methods
160    // -----------------------------------------------------------------------
161        void                processNext();
162        Token*              parseRegx(const bool matchingRParen = false);
163        virtual Token*      processCaret();
164    virtual Token*      processDollar();
165        virtual Token*      processLook(const unsigned short tokType);
166    virtual Token*      processBacksolidus_A();
167    virtual Token*      processBacksolidus_z();
168    virtual Token*      processBacksolidus_Z();
169    virtual Token*      processBacksolidus_b();
170    virtual Token*      processBacksolidus_B();
171    virtual Token*      processBacksolidus_lt();
172    virtual Token*      processBacksolidus_gt();
173    virtual Token*      processBacksolidus_c();
174    virtual Token*      processBacksolidus_C();
175    virtual Token*      processBacksolidus_i();
176    virtual Token*      processBacksolidus_I();
177    virtual Token*      processBacksolidus_g();
178    virtual Token*      processBacksolidus_X();
179    virtual Token*      processBackReference();
180        virtual Token*      processStar(Token* const tok);
181        virtual Token*      processPlus(Token* const tok);
182        virtual Token*      processQuestion(Token* const tok);
183    virtual Token*      processParen();
184    virtual Token*      processParen2();
185    virtual Token*      processCondition();
186    virtual Token*      processModifiers();
187    virtual Token*      processIndependent();
188    virtual RangeToken* parseCharacterClass(const bool useNRange);
189    virtual RangeToken* parseSetOperations();
190        virtual XMLInt32    processCInCharacterClass(RangeToken* const tok,
191                                                 const XMLInt32 ch);
192    RangeToken*         processBacksolidus_pP(const XMLInt32 ch);
193
194    // -----------------------------------------------------------------------
195    //  Protected PreCreated RangeToken access methods
196    // -----------------------------------------------------------------------
197        virtual Token*      getTokenForShorthand(const XMLInt32 ch);
198
199private:
200    // -----------------------------------------------------------------------
201    //  Private parsing/processing methods
202    // -----------------------------------------------------------------------
203    Token* parseTerm(const bool matchingRParen = false);
204        Token* parseFactor();
205        Token* parseAtom();
206
207    // -----------------------------------------------------------------------
208    //  Unimplemented constructors and operators
209    // -----------------------------------------------------------------------
210    RegxParser(const RegxParser&);
211    RegxParser& operator=(const RegxParser&);
212
213        // -----------------------------------------------------------------------
214    //  Private data types
215    // -----------------------------------------------------------------------
216    class ReferencePosition : public XMemory
217    {
218        public :
219            ReferencePosition(const int refNo, const int position);
220
221            int fReferenceNo;
222                        int     fPosition;
223    };
224
225    // -----------------------------------------------------------------------
226    //  Private Helper methods
227    // -----------------------------------------------------------------------
228    bool isSet(const int flag);
229        int hexChar(const XMLInt32 ch);
230
231        // -----------------------------------------------------------------------
232    //  Private data members
233        // -----------------------------------------------------------------------
234    MemoryManager*                  fMemoryManager;
235        bool                            fHasBackReferences;
236        int                             fOptions;
237        int                             fOffset;
238        int                             fNoGroups;
239        unsigned short                  fParseContext;
240        int                             fStringLen;
241        unsigned short                  fState;
242        XMLInt32                        fCharData;
243        XMLCh*                          fString;
244        RefVectorOf<ReferencePosition>* fReferences;
245    TokenFactory*                   fTokenFactory;
246        XMLMutex                                                fMutex;
247};
248
249
250// ---------------------------------------------------------------------------
251//  RegxParser: Getter Methods
252// ---------------------------------------------------------------------------
253inline unsigned short RegxParser::getParseContext() const {
254
255    return fParseContext;
256}
257
258inline unsigned short RegxParser::getState() const {
259
260        return fState;
261}
262
263inline XMLInt32 RegxParser::getCharData() const {
264
265    return fCharData;
266}
267
268inline int RegxParser::getNoParen() const {
269
270    return fNoGroups;
271}
272
273inline int RegxParser::getOffset() const {
274
275        return fOffset;
276}
277
278inline bool RegxParser::hasBackReferences() const {
279
280        return fHasBackReferences;
281}
282
283inline TokenFactory* RegxParser::getTokenFactory() const {
284
285    return fTokenFactory;
286}
287
288inline MemoryManager* RegxParser::getMemoryManager() const {
289    return fMemoryManager;
290}
291// ---------------------------------------------------------------------------
292//  RegxParser: Setter Methods
293// ---------------------------------------------------------------------------
294inline void RegxParser::setParseContext(const unsigned short value) {
295
296        fParseContext = value;
297}
298
299inline void RegxParser::setTokenFactory(TokenFactory* const tokFactory) {
300
301    fTokenFactory = tokFactory;
302}
303
304// ---------------------------------------------------------------------------
305//  RegxParser: Helper Methods
306// ---------------------------------------------------------------------------
307inline bool RegxParser::isSet(const int flag) {
308
309    return (fOptions & flag) == flag;
310}
311
312
313inline int RegxParser::hexChar(const XMLInt32 ch) {
314
315        if (ch < chDigit_0 || ch > chLatin_f)
316                return -1;
317
318        if (ch <= chDigit_9)
319                return ch - chDigit_0;
320
321        if (ch < chLatin_A)
322                return -1;
323
324        if (ch <= chLatin_F)
325                return ch - chLatin_A + 10;
326
327        if (ch < chLatin_a)
328                return -1;
329
330        return ch - chLatin_a + 10;
331}
332
333XERCES_CPP_NAMESPACE_END
334
335#endif
336
337/**
338  *     End file RegxParser.hpp
339  */
340
Note: See TracBrowser for help on using the repository browser.