/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* * $Id: RegxParser.hpp 568078 2007-08-21 11:43:25Z amassari $ */ /* * A regular expression parser */ #if !defined(REGXPARSER_HPP) #define REGXPARSER_HPP // --------------------------------------------------------------------------- // Includes // --------------------------------------------------------------------------- #include #include XERCES_CPP_NAMESPACE_BEGIN // --------------------------------------------------------------------------- // Forward Declaration // --------------------------------------------------------------------------- class Token; class RangeToken; class TokenFactory; class XMLUTIL_EXPORT RegxParser : public XMemory { public: // ----------------------------------------------------------------------- // Public constant data // ----------------------------------------------------------------------- // Parse tokens enum { REGX_T_CHAR = 0, REGX_T_EOF = 1, REGX_T_OR = 2, REGX_T_STAR = 3, REGX_T_PLUS = 4, REGX_T_QUESTION = 5, REGX_T_LPAREN = 6, REGX_T_RPAREN = 7, REGX_T_DOT = 8, REGX_T_LBRACKET = 9, REGX_T_BACKSOLIDUS = 10, REGX_T_CARET = 11, REGX_T_DOLLAR = 12, REGX_T_LPAREN2 = 13, REGX_T_LOOKAHEAD = 14, REGX_T_NEGATIVELOOKAHEAD = 15, REGX_T_LOOKBEHIND = 16, REGX_T_NEGATIVELOOKBEHIND = 17, REGX_T_INDEPENDENT = 18, REGX_T_SET_OPERATIONS = 19, REGX_T_POSIX_CHARCLASS_START = 20, REGX_T_COMMENT = 21, REGX_T_MODIFIERS = 22, REGX_T_CONDITION = 23, REGX_T_XMLSCHEMA_CC_SUBTRACTION = 24 }; static const unsigned short S_NORMAL; static const unsigned short S_INBRACKETS; static const unsigned short S_INXBRACKETS; // ----------------------------------------------------------------------- // Public Constructors and Destructor // ----------------------------------------------------------------------- RegxParser(MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager); virtual ~RegxParser(); // ----------------------------------------------------------------------- // Getter methods // ----------------------------------------------------------------------- unsigned short getParseContext() const; unsigned short getState() const; XMLInt32 getCharData() const; int getNoParen() const; int getOffset() const; bool hasBackReferences() const; TokenFactory* getTokenFactory() const; // ----------------------------------------------------------------------- // Setter methods // ----------------------------------------------------------------------- void setParseContext(const unsigned short value); void setTokenFactory(TokenFactory* const tokFactory); // ----------------------------------------------------------------------- // Public Parsing methods // ----------------------------------------------------------------------- Token* parse(const XMLCh* const regxStr, const int options); protected: // ----------------------------------------------------------------------- // Protected Helper methods // ----------------------------------------------------------------------- virtual bool checkQuestion(const int off); virtual XMLInt32 decodeEscaped(); MemoryManager* getMemoryManager() const; // ----------------------------------------------------------------------- // Protected Parsing/Processing methods // ----------------------------------------------------------------------- void processNext(); Token* parseRegx(const bool matchingRParen = false); virtual Token* processCaret(); virtual Token* processDollar(); virtual Token* processLook(const unsigned short tokType); virtual Token* processBacksolidus_A(); virtual Token* processBacksolidus_z(); virtual Token* processBacksolidus_Z(); virtual Token* processBacksolidus_b(); virtual Token* processBacksolidus_B(); virtual Token* processBacksolidus_lt(); virtual Token* processBacksolidus_gt(); virtual Token* processBacksolidus_c(); virtual Token* processBacksolidus_C(); virtual Token* processBacksolidus_i(); virtual Token* processBacksolidus_I(); virtual Token* processBacksolidus_g(); virtual Token* processBacksolidus_X(); virtual Token* processBackReference(); virtual Token* processStar(Token* const tok); virtual Token* processPlus(Token* const tok); virtual Token* processQuestion(Token* const tok); virtual Token* processParen(); virtual Token* processParen2(); virtual Token* processCondition(); virtual Token* processModifiers(); virtual Token* processIndependent(); virtual RangeToken* parseCharacterClass(const bool useNRange); virtual RangeToken* parseSetOperations(); virtual XMLInt32 processCInCharacterClass(RangeToken* const tok, const XMLInt32 ch); RangeToken* processBacksolidus_pP(const XMLInt32 ch); // ----------------------------------------------------------------------- // Protected PreCreated RangeToken access methods // ----------------------------------------------------------------------- virtual Token* getTokenForShorthand(const XMLInt32 ch); private: // ----------------------------------------------------------------------- // Private parsing/processing methods // ----------------------------------------------------------------------- Token* parseTerm(const bool matchingRParen = false); Token* parseFactor(); Token* parseAtom(); // ----------------------------------------------------------------------- // Unimplemented constructors and operators // ----------------------------------------------------------------------- RegxParser(const RegxParser&); RegxParser& operator=(const RegxParser&); // ----------------------------------------------------------------------- // Private data types // ----------------------------------------------------------------------- class ReferencePosition : public XMemory { public : ReferencePosition(const int refNo, const int position); int fReferenceNo; int fPosition; }; // ----------------------------------------------------------------------- // Private Helper methods // ----------------------------------------------------------------------- bool isSet(const int flag); int hexChar(const XMLInt32 ch); // ----------------------------------------------------------------------- // Private data members // ----------------------------------------------------------------------- MemoryManager* fMemoryManager; bool fHasBackReferences; int fOptions; int fOffset; int fNoGroups; unsigned short fParseContext; int fStringLen; unsigned short fState; XMLInt32 fCharData; XMLCh* fString; RefVectorOf* fReferences; TokenFactory* fTokenFactory; }; // --------------------------------------------------------------------------- // RegxParser: Getter Methods // --------------------------------------------------------------------------- inline unsigned short RegxParser::getParseContext() const { return fParseContext; } inline unsigned short RegxParser::getState() const { return fState; } inline XMLInt32 RegxParser::getCharData() const { return fCharData; } inline int RegxParser::getNoParen() const { return fNoGroups; } inline int RegxParser::getOffset() const { return fOffset; } inline bool RegxParser::hasBackReferences() const { return fHasBackReferences; } inline TokenFactory* RegxParser::getTokenFactory() const { return fTokenFactory; } inline MemoryManager* RegxParser::getMemoryManager() const { return fMemoryManager; } // --------------------------------------------------------------------------- // RegxParser: Setter Methods // --------------------------------------------------------------------------- inline void RegxParser::setParseContext(const unsigned short value) { fParseContext = value; } inline void RegxParser::setTokenFactory(TokenFactory* const tokFactory) { fTokenFactory = tokFactory; } // --------------------------------------------------------------------------- // RegxParser: Helper Methods // --------------------------------------------------------------------------- inline bool RegxParser::isSet(const int flag) { return (fOptions & flag) == flag; } inline int RegxParser::hexChar(const XMLInt32 ch) { if (ch < chDigit_0 || ch > chLatin_f) return -1; if (ch <= chDigit_9) return ch - chDigit_0; if (ch < chLatin_A) return -1; if (ch <= chLatin_F) return ch - chLatin_A + 10; if (ch < chLatin_a) return -1; return ch - chLatin_a + 10; } XERCES_CPP_NAMESPACE_END #endif /** * End file RegxParser.hpp */