[188] | 1 | /*
|
---|
| 2 | * The Apache Software License, Version 1.1
|
---|
| 3 | *
|
---|
| 4 | * Copyright (c) 2001-2003 The Apache Software Foundation. All rights
|
---|
| 5 | * reserved.
|
---|
| 6 | *
|
---|
| 7 | * Redistribution and use in source and binary forms, with or without
|
---|
| 8 | * modification, are permitted provided that the following conditions
|
---|
| 9 | * are met:
|
---|
| 10 | *
|
---|
| 11 | * 1. Redistributions of source code must retain the above copyright
|
---|
| 12 | * notice, this list of conditions and the following disclaimer.
|
---|
| 13 | *
|
---|
| 14 | * 2. Redistributions in binary form must reproduce the above copyright
|
---|
| 15 | * notice, this list of conditions and the following disclaimer in
|
---|
| 16 | * the documentation and/or other materials provided with the
|
---|
| 17 | * distribution.
|
---|
| 18 | *
|
---|
| 19 | * 3. The end-user documentation included with the redistribution,
|
---|
| 20 | * if any, must include the following acknowledgment:
|
---|
| 21 | * "This product includes software developed by the
|
---|
| 22 | * Apache Software Foundation (http://www.apache.org/)."
|
---|
| 23 | * Alternately, this acknowledgment may appear in the software itself,
|
---|
| 24 | * if and wherever such third-party acknowledgments normally appear.
|
---|
| 25 | *
|
---|
| 26 | * 4. The names "Xerces" and "Apache Software Foundation" must
|
---|
| 27 | * not be used to endorse or promote products derived from this
|
---|
| 28 | * software without prior written permission. For written
|
---|
| 29 | * permission, please contact apache\@apache.org.
|
---|
| 30 | *
|
---|
| 31 | * 5. Products derived from this software may not be called "Apache",
|
---|
| 32 | * nor may "Apache" appear in their name, without prior written
|
---|
| 33 | * permission of the Apache Software Foundation.
|
---|
| 34 | *
|
---|
| 35 | * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
|
---|
| 36 | * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
---|
| 37 | * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
---|
| 38 | * DISCLAIMED. IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
|
---|
| 39 | * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
---|
| 40 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
---|
| 41 | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
|
---|
| 42 | * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
---|
| 43 | * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
---|
| 44 | * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
|
---|
| 45 | * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
---|
| 46 | * SUCH DAMAGE.
|
---|
| 47 | * ====================================================================
|
---|
| 48 | *
|
---|
| 49 | * This software consists of voluntary contributions made by many
|
---|
| 50 | * individuals on behalf of the Apache Software Foundation, and was
|
---|
| 51 | * originally based on software copyright (c) 2001, International
|
---|
| 52 | * Business Machines, Inc., http://www.ibm.com . For more information
|
---|
| 53 | * on the Apache Software Foundation, please see
|
---|
| 54 | * <http://www.apache.org/>.
|
---|
| 55 | */
|
---|
| 56 |
|
---|
| 57 | /*
|
---|
| 58 | * $Id: RegularExpression.hpp,v 1.17 2004/01/13 20:05:00 peiyongz Exp $
|
---|
| 59 | */
|
---|
| 60 |
|
---|
| 61 | #if !defined(REGULAREXPRESSION_HPP)
|
---|
| 62 | #define REGULAREXPRESSION_HPP
|
---|
| 63 |
|
---|
| 64 | // ---------------------------------------------------------------------------
|
---|
| 65 | // Includes
|
---|
| 66 | // ---------------------------------------------------------------------------
|
---|
| 67 | #include <xercesc/util/RefArrayVectorOf.hpp>
|
---|
| 68 | #include <xercesc/util/XMLString.hpp>
|
---|
| 69 | #include <xercesc/util/Janitor.hpp>
|
---|
| 70 | #include <xercesc/util/Mutexes.hpp>
|
---|
| 71 | #include <xercesc/util/regx/Op.hpp>
|
---|
| 72 | #include <xercesc/util/regx/TokenFactory.hpp>
|
---|
| 73 | #include <xercesc/util/regx/BMPattern.hpp>
|
---|
| 74 | #include <xercesc/util/regx/ModifierToken.hpp>
|
---|
| 75 | #include <xercesc/util/regx/ConditionToken.hpp>
|
---|
| 76 | #include <xercesc/util/regx/OpFactory.hpp>
|
---|
| 77 |
|
---|
| 78 | XERCES_CPP_NAMESPACE_BEGIN
|
---|
| 79 |
|
---|
| 80 | // ---------------------------------------------------------------------------
|
---|
| 81 | // Forward Declaration
|
---|
| 82 | // ---------------------------------------------------------------------------
|
---|
| 83 | class RangeToken;
|
---|
| 84 | class Match;
|
---|
| 85 |
|
---|
| 86 | class XMLUTIL_EXPORT RegularExpression : public XMemory
|
---|
| 87 | {
|
---|
| 88 | public:
|
---|
| 89 | // -----------------------------------------------------------------------
|
---|
| 90 | // Public Constructors and Destructor
|
---|
| 91 | // -----------------------------------------------------------------------
|
---|
| 92 | RegularExpression
|
---|
| 93 | (
|
---|
| 94 | const char* const pattern
|
---|
| 95 | , MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager
|
---|
| 96 | );
|
---|
| 97 | RegularExpression
|
---|
| 98 | (
|
---|
| 99 | const char* const pattern
|
---|
| 100 | , const char* const options
|
---|
| 101 | , MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager
|
---|
| 102 | );
|
---|
| 103 | RegularExpression
|
---|
| 104 | (
|
---|
| 105 | const XMLCh* const pattern
|
---|
| 106 | , MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager
|
---|
| 107 | );
|
---|
| 108 | RegularExpression
|
---|
| 109 | (
|
---|
| 110 | const XMLCh* const pattern
|
---|
| 111 | , const XMLCh* const options
|
---|
| 112 | , MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager
|
---|
| 113 | );
|
---|
| 114 | ~RegularExpression();
|
---|
| 115 |
|
---|
| 116 | // -----------------------------------------------------------------------
|
---|
| 117 | // Public Constants
|
---|
| 118 | // -----------------------------------------------------------------------
|
---|
| 119 | static const unsigned int MARK_PARENS;
|
---|
| 120 | static const unsigned int IGNORE_CASE;
|
---|
| 121 | static const unsigned int SINGLE_LINE;
|
---|
| 122 | static const unsigned int MULTIPLE_LINE;
|
---|
| 123 | static const unsigned int EXTENDED_COMMENT;
|
---|
| 124 | static const unsigned int USE_UNICODE_CATEGORY;
|
---|
| 125 | static const unsigned int UNICODE_WORD_BOUNDARY;
|
---|
| 126 | static const unsigned int PROHIBIT_HEAD_CHARACTER_OPTIMIZATION;
|
---|
| 127 | static const unsigned int PROHIBIT_FIXED_STRING_OPTIMIZATION;
|
---|
| 128 | static const unsigned int XMLSCHEMA_MODE;
|
---|
| 129 | static const unsigned int SPECIAL_COMMA;
|
---|
| 130 | static const unsigned short WT_IGNORE;
|
---|
| 131 | static const unsigned short WT_LETTER;
|
---|
| 132 | static const unsigned short WT_OTHER;
|
---|
| 133 |
|
---|
| 134 | // -----------------------------------------------------------------------
|
---|
| 135 | // Public Helper methods
|
---|
| 136 | // -----------------------------------------------------------------------
|
---|
| 137 | static int getOptionValue(const XMLCh ch);
|
---|
| 138 |
|
---|
| 139 | // -----------------------------------------------------------------------
|
---|
| 140 | // Matching methods
|
---|
| 141 | // -----------------------------------------------------------------------
|
---|
| 142 | bool matches(const char* const matchString, MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager);
|
---|
| 143 | bool matches(const char* const matchString, const int start,
|
---|
| 144 | const int end, MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager);
|
---|
| 145 | bool matches(const char* const matchString, Match* const pMatch, MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager);
|
---|
| 146 | bool matches(const char* const matchString, const int start,
|
---|
| 147 | const int end, Match* const pMatch, MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager);
|
---|
| 148 |
|
---|
| 149 | bool matches(const XMLCh* const matchString, MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager);
|
---|
| 150 | bool matches(const XMLCh* const matchString, const int start,
|
---|
| 151 | const int end, MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager);
|
---|
| 152 | bool matches(const XMLCh* const matchString, Match* const pMatch, MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager);
|
---|
| 153 | bool matches(const XMLCh* const matchString, const int start,
|
---|
| 154 | const int end, Match* const pMatch, MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager);
|
---|
| 155 |
|
---|
| 156 | // -----------------------------------------------------------------------
|
---|
| 157 | // Tokenize methods
|
---|
| 158 | // -----------------------------------------------------------------------
|
---|
| 159 | // Note: The caller owns the string vector that is returned, and is responsible
|
---|
| 160 | // for deleting it.
|
---|
| 161 | RefArrayVectorOf<XMLCh> *tokenize(const char* const matchString);
|
---|
| 162 | RefArrayVectorOf<XMLCh> *tokenize(const char* const matchString, const int start,
|
---|
| 163 | const int end);
|
---|
| 164 |
|
---|
| 165 | RefArrayVectorOf<XMLCh> *tokenize(const XMLCh* const matchString);
|
---|
| 166 | RefArrayVectorOf<XMLCh> *tokenize(const XMLCh* const matchString,
|
---|
| 167 | const int start, const int end);
|
---|
| 168 |
|
---|
| 169 | // -----------------------------------------------------------------------
|
---|
| 170 | // Replace methods
|
---|
| 171 | // -----------------------------------------------------------------------
|
---|
| 172 | // Note: The caller owns the XMLCh* that is returned, and is responsible for
|
---|
| 173 | // deleting it.
|
---|
| 174 | XMLCh *replace(const char* const matchString, const char* const replaceString);
|
---|
| 175 | XMLCh *replace(const char* const matchString, const char* const replaceString,
|
---|
| 176 | const int start, const int end);
|
---|
| 177 |
|
---|
| 178 | XMLCh *replace(const XMLCh* const matchString, const XMLCh* const replaceString);
|
---|
| 179 | XMLCh *replace(const XMLCh* const matchString, const XMLCh* const replaceString,
|
---|
| 180 | const int start, const int end);
|
---|
| 181 |
|
---|
| 182 | private:
|
---|
| 183 | // -----------------------------------------------------------------------
|
---|
| 184 | // Private data types
|
---|
| 185 | // -----------------------------------------------------------------------
|
---|
| 186 | class XMLUTIL_EXPORT Context : public XMemory
|
---|
| 187 | {
|
---|
| 188 | public :
|
---|
| 189 | Context(MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager);
|
---|
| 190 | ~Context();
|
---|
| 191 |
|
---|
| 192 | inline const XMLCh* getString() const { return fString; }
|
---|
| 193 | void reset(const XMLCh* const string, const int stringLen,
|
---|
| 194 | const int start, const int limit, const int noClosures);
|
---|
| 195 | bool nextCh(XMLInt32& ch, int& offset, const short direction);
|
---|
| 196 |
|
---|
| 197 | bool fAdoptMatch;
|
---|
| 198 | int fStart;
|
---|
| 199 | int fLimit;
|
---|
| 200 | int fLength; // fLimit - fStart
|
---|
| 201 | int fSize;
|
---|
| 202 | int fStringMaxLen;
|
---|
| 203 | int* fOffsets;
|
---|
| 204 | Match* fMatch;
|
---|
| 205 | XMLCh* fString;
|
---|
| 206 | MemoryManager* fMemoryManager;
|
---|
| 207 |
|
---|
| 208 | friend class Janitor<Context>;
|
---|
| 209 | };
|
---|
| 210 |
|
---|
| 211 | // -----------------------------------------------------------------------
|
---|
| 212 | // Unimplemented constructors and operators
|
---|
| 213 | // -----------------------------------------------------------------------
|
---|
| 214 | RegularExpression(const RegularExpression&);
|
---|
| 215 | RegularExpression& operator=(const RegularExpression&);
|
---|
| 216 |
|
---|
| 217 | // -----------------------------------------------------------------------
|
---|
| 218 | // Cleanup methods
|
---|
| 219 | // -----------------------------------------------------------------------
|
---|
| 220 | void cleanUp();
|
---|
| 221 |
|
---|
| 222 | // -----------------------------------------------------------------------
|
---|
| 223 | // Setter methods
|
---|
| 224 | // -----------------------------------------------------------------------
|
---|
| 225 | void setPattern(const XMLCh* const pattern, const XMLCh* const options=0);
|
---|
| 226 |
|
---|
| 227 | // -----------------------------------------------------------------------
|
---|
| 228 | // Private Helper methods
|
---|
| 229 | // -----------------------------------------------------------------------
|
---|
| 230 | void prepare();
|
---|
| 231 | int parseOptions(const XMLCh* const options);
|
---|
| 232 | bool isSet(const int options, const int flag);
|
---|
| 233 | unsigned short getWordType(const XMLCh* const target, const int begin,
|
---|
| 234 | const int end, const int offset);
|
---|
| 235 | unsigned short getCharType(const XMLCh ch);
|
---|
| 236 | unsigned short getPreviousWordType(const XMLCh* const target,
|
---|
| 237 | const int start, const int end,
|
---|
| 238 | int offset);
|
---|
| 239 |
|
---|
| 240 | /**
|
---|
| 241 | * Matching helpers
|
---|
| 242 | */
|
---|
| 243 | int match(Context* const context, const Op* const operations, int offset,
|
---|
| 244 | const short direction);
|
---|
| 245 | bool matchIgnoreCase(const XMLInt32 ch1, const XMLInt32 ch2);
|
---|
| 246 |
|
---|
| 247 | /**
|
---|
| 248 | * Helper methods used by match(Context* ...)
|
---|
| 249 | */
|
---|
| 250 | bool matchChar(Context* const context, const XMLInt32 ch, int& offset,
|
---|
| 251 | const short direction, const bool ignoreCase);
|
---|
| 252 | bool matchDot(Context* const context, int& offset, const short direction);
|
---|
| 253 | bool matchRange(Context* const context, const Op* const op,
|
---|
| 254 | int& offset, const short direction, const bool ignoreCase);
|
---|
| 255 | bool matchAnchor(Context* const context, const XMLInt32 ch,
|
---|
| 256 | const int offset);
|
---|
| 257 | bool matchBackReference(Context* const context, const XMLInt32 ch,
|
---|
| 258 | int& offset, const short direction,
|
---|
| 259 | const bool ignoreCase);
|
---|
| 260 | bool matchString(Context* const context, const XMLCh* const literal,
|
---|
| 261 | int& offset, const short direction, const bool ignoreCase);
|
---|
| 262 | int matchUnion(Context* const context, const Op* const op, int offset,
|
---|
| 263 | const short direction);
|
---|
| 264 | int matchCapture(Context* const context, const Op* const op, int offset,
|
---|
| 265 | const short direction);
|
---|
| 266 | bool matchCondition(Context* const context, const Op* const op, int offset,
|
---|
| 267 | const short direction);
|
---|
| 268 | int matchModifier(Context* const context, const Op* const op, int offset,
|
---|
| 269 | const short direction);
|
---|
| 270 |
|
---|
| 271 | /**
|
---|
| 272 | * Tokenize helper
|
---|
| 273 | *
|
---|
| 274 | * This overloaded tokenize is for internal use only. It provides a way to
|
---|
| 275 | * keep track of the sub-expressions in each match of the pattern.
|
---|
| 276 | *
|
---|
| 277 | * It is called by the other tokenize methods, and by the replace method.
|
---|
| 278 | * The caller is responsible for the deletion of the returned
|
---|
| 279 | * RefArrayVectorOf<XMLCh*>
|
---|
| 280 | */
|
---|
| 281 | RefArrayVectorOf<XMLCh> *tokenize(const XMLCh* const matchString,
|
---|
| 282 | const int start, const int end,
|
---|
| 283 | RefVectorOf<Match> *subEx);
|
---|
| 284 | /**
|
---|
| 285 | * Replace helpers
|
---|
| 286 | *
|
---|
| 287 | * Note: the caller owns the XMLCh* that is returned
|
---|
| 288 | */
|
---|
| 289 | const XMLCh *subInExp(const XMLCh* const repString,
|
---|
| 290 | const XMLCh* const origString,
|
---|
| 291 | const Match* subEx);
|
---|
| 292 | /**
|
---|
| 293 | * Converts a token tree into an operation tree
|
---|
| 294 | */
|
---|
| 295 | void compile(const Token* const token);
|
---|
| 296 | Op* compile(const Token* const token, Op* const next,
|
---|
| 297 | const bool reverse);
|
---|
| 298 | /**
|
---|
| 299 | * Helper methods used by compile
|
---|
| 300 | */
|
---|
| 301 | Op* compileSingle(const Token* const token, Op* const next,
|
---|
| 302 | const unsigned short tokType);
|
---|
| 303 | Op* compileUnion(const Token* const token, Op* const next,
|
---|
| 304 | const bool reverse);
|
---|
| 305 | Op* compileCondition(const Token* const token, Op* const next,
|
---|
| 306 | const bool reverse);
|
---|
| 307 | Op* compileParenthesis(const Token* const token, Op* const next,
|
---|
| 308 | const bool reverse);
|
---|
| 309 | Op* compileLook(const Token* const token, const Op* const next,
|
---|
| 310 | const bool reverse, const unsigned short tokType);
|
---|
| 311 | Op* compileConcat(const Token* const token, Op* const next,
|
---|
| 312 | const bool reverse);
|
---|
| 313 | Op* compileClosure(const Token* const token, Op* const next,
|
---|
| 314 | const bool reverse, const unsigned short tokType);
|
---|
| 315 |
|
---|
| 316 | // -----------------------------------------------------------------------
|
---|
| 317 | // Private data members
|
---|
| 318 | // -----------------------------------------------------------------------
|
---|
| 319 | bool fHasBackReferences;
|
---|
| 320 | bool fFixedStringOnly;
|
---|
| 321 | int fNoGroups;
|
---|
| 322 | int fMinLength;
|
---|
| 323 | int fNoClosures;
|
---|
| 324 | unsigned int fOptions;
|
---|
| 325 | BMPattern* fBMPattern;
|
---|
| 326 | XMLCh* fPattern;
|
---|
| 327 | XMLCh* fFixedString;
|
---|
| 328 | Op* fOperations;
|
---|
| 329 | Token* fTokenTree;
|
---|
| 330 | RangeToken* fFirstChar;
|
---|
| 331 | static RangeToken* fWordRange;
|
---|
| 332 | OpFactory fOpFactory;
|
---|
| 333 | XMLMutex fMutex;
|
---|
| 334 | TokenFactory* fTokenFactory;
|
---|
| 335 | MemoryManager* fMemoryManager;
|
---|
| 336 | };
|
---|
| 337 |
|
---|
| 338 |
|
---|
| 339 | // ---------------------------------------------------------------------------
|
---|
| 340 | // RegularExpression: Cleanup methods
|
---|
| 341 | // ---------------------------------------------------------------------------
|
---|
| 342 | inline void RegularExpression::cleanUp() {
|
---|
| 343 |
|
---|
| 344 | fMemoryManager->deallocate(fPattern);//delete [] fPattern;
|
---|
| 345 | fMemoryManager->deallocate(fFixedString);//delete [] fFixedString;
|
---|
| 346 | delete fBMPattern;
|
---|
| 347 | delete fTokenFactory;
|
---|
| 348 | }
|
---|
| 349 |
|
---|
| 350 | // ---------------------------------------------------------------------------
|
---|
| 351 | // RegularExpression: Helper methods
|
---|
| 352 | // ---------------------------------------------------------------------------
|
---|
| 353 | inline bool RegularExpression::isSet(const int options, const int flag) {
|
---|
| 354 |
|
---|
| 355 | return (options & flag) == flag;
|
---|
| 356 | }
|
---|
| 357 |
|
---|
| 358 | inline Op* RegularExpression::compileLook(const Token* const token,
|
---|
| 359 | const Op* const next,
|
---|
| 360 | const bool reverse,
|
---|
| 361 | const unsigned short tokType) {
|
---|
| 362 |
|
---|
| 363 | Op* ret = 0;
|
---|
| 364 | Op* result = compile(token->getChild(0), 0, reverse);
|
---|
| 365 |
|
---|
| 366 | switch(tokType) {
|
---|
| 367 | case Token::T_LOOKAHEAD:
|
---|
| 368 | ret = fOpFactory.createLookOp(Op::O_LOOKAHEAD, next, result);
|
---|
| 369 | break;
|
---|
| 370 | case Token::T_NEGATIVELOOKAHEAD:
|
---|
| 371 | ret = fOpFactory.createLookOp(Op::O_NEGATIVELOOKAHEAD, next, result);
|
---|
| 372 | break;
|
---|
| 373 | case Token::T_LOOKBEHIND:
|
---|
| 374 | ret = fOpFactory.createLookOp(Op::O_LOOKBEHIND, next, result);
|
---|
| 375 | break;
|
---|
| 376 | case Token::T_NEGATIVELOOKBEHIND:
|
---|
| 377 | ret = fOpFactory.createLookOp(Op::O_NEGATIVELOOKBEHIND, next, result);
|
---|
| 378 | break;
|
---|
| 379 | case Token::T_INDEPENDENT:
|
---|
| 380 | ret = fOpFactory.createIndependentOp(next, result);
|
---|
| 381 | break;
|
---|
| 382 | case Token::T_MODIFIERGROUP:
|
---|
| 383 | ret = fOpFactory.createModifierOp(next, result,
|
---|
| 384 | ((ModifierToken *) token)->getOptions(),
|
---|
| 385 | ((ModifierToken *) token)->getOptionsMask());
|
---|
| 386 | break;
|
---|
| 387 | }
|
---|
| 388 |
|
---|
| 389 |
|
---|
| 390 | return ret;
|
---|
| 391 | }
|
---|
| 392 |
|
---|
| 393 | inline Op* RegularExpression::compileSingle(const Token* const token,
|
---|
| 394 | Op* const next,
|
---|
| 395 | const unsigned short tokType) {
|
---|
| 396 |
|
---|
| 397 | Op* ret = 0;
|
---|
| 398 |
|
---|
| 399 | switch (tokType) {
|
---|
| 400 | case Token::T_DOT:
|
---|
| 401 | ret = fOpFactory.createDotOp();
|
---|
| 402 | break;
|
---|
| 403 | case Token::T_CHAR:
|
---|
| 404 | ret = fOpFactory.createCharOp(token->getChar());
|
---|
| 405 | break;
|
---|
| 406 | case Token::T_ANCHOR:
|
---|
| 407 | ret = fOpFactory.createAnchorOp(token->getChar());
|
---|
| 408 | break;
|
---|
| 409 | case Token::T_RANGE:
|
---|
| 410 | case Token::T_NRANGE:
|
---|
| 411 | ret = fOpFactory.createRangeOp(token);
|
---|
| 412 | break;
|
---|
| 413 | case Token::T_EMPTY:
|
---|
| 414 | ret = next;
|
---|
| 415 | break;
|
---|
| 416 | case Token::T_STRING:
|
---|
| 417 | ret = fOpFactory.createStringOp(token->getString());
|
---|
| 418 | break;
|
---|
| 419 | case Token::T_BACKREFERENCE:
|
---|
| 420 | ret = fOpFactory.createBackReferenceOp(token->getReferenceNo());
|
---|
| 421 | break;
|
---|
| 422 | }
|
---|
| 423 |
|
---|
| 424 | if (tokType != Token::T_EMPTY)
|
---|
| 425 | ret->setNextOp(next);
|
---|
| 426 |
|
---|
| 427 | return ret;
|
---|
| 428 | }
|
---|
| 429 |
|
---|
| 430 |
|
---|
| 431 | inline Op* RegularExpression::compileUnion(const Token* const token,
|
---|
| 432 | Op* const next,
|
---|
| 433 | const bool reverse) {
|
---|
| 434 |
|
---|
| 435 | int tokSize = token->size();
|
---|
| 436 | UnionOp* uniOp = fOpFactory.createUnionOp(tokSize);
|
---|
| 437 |
|
---|
| 438 | for (int i=0; i<tokSize; i++) {
|
---|
| 439 |
|
---|
| 440 | uniOp->addElement(compile(token->getChild(i), next, reverse));
|
---|
| 441 | }
|
---|
| 442 |
|
---|
| 443 | return uniOp;
|
---|
| 444 | }
|
---|
| 445 |
|
---|
| 446 |
|
---|
| 447 | inline Op* RegularExpression::compileCondition(const Token* const token,
|
---|
| 448 | Op* const next,
|
---|
| 449 | const bool reverse) {
|
---|
| 450 |
|
---|
| 451 | Token* condTok = ((ConditionToken*) token)->getConditionToken();
|
---|
| 452 | Token* yesTok = token->getChild(0);
|
---|
| 453 | Token* noTok = token->getChild(1);
|
---|
| 454 | int refNo = token->getReferenceNo();
|
---|
| 455 | Op* condOp = (condTok == 0) ? 0 : compile(condTok, 0, reverse);
|
---|
| 456 | Op* yesOp = compile(yesTok, next, reverse);
|
---|
| 457 | Op* noOp = (noTok == 0) ? 0 : compile(noTok, next, reverse);
|
---|
| 458 |
|
---|
| 459 | return fOpFactory.createConditionOp(next, refNo, condOp, yesOp, noOp);
|
---|
| 460 | }
|
---|
| 461 |
|
---|
| 462 |
|
---|
| 463 | inline Op* RegularExpression::compileParenthesis(const Token* const token,
|
---|
| 464 | Op* const next,
|
---|
| 465 | const bool reverse) {
|
---|
| 466 |
|
---|
| 467 | if (token->getNoParen() == 0)
|
---|
| 468 | return compile(token->getChild(0), next, reverse);
|
---|
| 469 |
|
---|
| 470 | Op* captureOp = 0;
|
---|
| 471 |
|
---|
| 472 | if (reverse) {
|
---|
| 473 |
|
---|
| 474 | captureOp = fOpFactory.createCaptureOp(token->getNoParen(), next);
|
---|
| 475 | captureOp = compile(token->getChild(0), captureOp, reverse);
|
---|
| 476 |
|
---|
| 477 | return fOpFactory.createCaptureOp(-token->getNoParen(), captureOp);
|
---|
| 478 | }
|
---|
| 479 |
|
---|
| 480 | captureOp = fOpFactory.createCaptureOp(-token->getNoParen(), next);
|
---|
| 481 | captureOp = compile(token->getChild(0), captureOp, reverse);
|
---|
| 482 |
|
---|
| 483 | return fOpFactory.createCaptureOp(token->getNoParen(), captureOp);
|
---|
| 484 | }
|
---|
| 485 |
|
---|
| 486 | inline Op* RegularExpression::compileConcat(const Token* const token,
|
---|
| 487 | Op* const next,
|
---|
| 488 | const bool reverse) {
|
---|
| 489 |
|
---|
| 490 | Op* ret = next;
|
---|
| 491 | int tokSize = token->size();
|
---|
| 492 |
|
---|
| 493 | if (!reverse) {
|
---|
| 494 |
|
---|
| 495 | for (int i= tokSize - 1; i>=0; i--) {
|
---|
| 496 | ret = compile(token->getChild(i), ret, false);
|
---|
| 497 | }
|
---|
| 498 | }
|
---|
| 499 | else {
|
---|
| 500 |
|
---|
| 501 | for (int i= 0; i< tokSize; i++) {
|
---|
| 502 | ret = compile(token->getChild(i), ret, true);
|
---|
| 503 | }
|
---|
| 504 | }
|
---|
| 505 |
|
---|
| 506 | return ret;
|
---|
| 507 | }
|
---|
| 508 |
|
---|
| 509 | inline Op* RegularExpression::compileClosure(const Token* const token,
|
---|
| 510 | Op* const next,
|
---|
| 511 | const bool reverse,
|
---|
| 512 | const unsigned short tokType) {
|
---|
| 513 |
|
---|
| 514 | Op* ret = 0;
|
---|
| 515 | Token* childTok = token->getChild(0);
|
---|
| 516 | int min = token->getMin();
|
---|
| 517 | int max = token->getMax();
|
---|
| 518 |
|
---|
| 519 | if (min >= 0 && min == max) {
|
---|
| 520 |
|
---|
| 521 | ret = next;
|
---|
| 522 | for (int i=0; i< min; i++) {
|
---|
| 523 | ret = compile(childTok, ret, reverse);
|
---|
| 524 | }
|
---|
| 525 |
|
---|
| 526 | return ret;
|
---|
| 527 | }
|
---|
| 528 |
|
---|
| 529 | if (min > 0 && max > 0)
|
---|
| 530 | max -= min;
|
---|
| 531 |
|
---|
| 532 | if (max > 0) {
|
---|
| 533 |
|
---|
| 534 | ret = next;
|
---|
| 535 | for (int i=0; i<max; i++) {
|
---|
| 536 |
|
---|
| 537 | ChildOp* childOp = fOpFactory.createQuestionOp(
|
---|
| 538 | tokType == Token::T_NONGREEDYCLOSURE);
|
---|
| 539 |
|
---|
| 540 | childOp->setNextOp(next);
|
---|
| 541 | childOp->setChild(compile(childTok, ret, reverse));
|
---|
| 542 | ret = childOp;
|
---|
| 543 | }
|
---|
| 544 | }
|
---|
| 545 | else {
|
---|
| 546 |
|
---|
| 547 | ChildOp* childOp = 0;
|
---|
| 548 |
|
---|
| 549 | if (tokType == Token::T_NONGREEDYCLOSURE) {
|
---|
| 550 | childOp = fOpFactory.createNonGreedyClosureOp();
|
---|
| 551 | }
|
---|
| 552 | else {
|
---|
| 553 |
|
---|
| 554 | if (childTok->getMinLength() == 0)
|
---|
| 555 | childOp = fOpFactory.createClosureOp(fNoClosures++);
|
---|
| 556 | else
|
---|
| 557 | childOp = fOpFactory.createClosureOp(-1);
|
---|
| 558 | }
|
---|
| 559 |
|
---|
| 560 | childOp->setNextOp(next);
|
---|
| 561 | childOp->setChild(compile(childTok, childOp, reverse));
|
---|
| 562 | ret = childOp;
|
---|
| 563 | }
|
---|
| 564 |
|
---|
| 565 | if (min > 0) {
|
---|
| 566 |
|
---|
| 567 | for (int i=0; i< min; i++) {
|
---|
| 568 | ret = compile(childTok, ret, reverse);
|
---|
| 569 | }
|
---|
| 570 | }
|
---|
| 571 |
|
---|
| 572 | return ret;
|
---|
| 573 | }
|
---|
| 574 |
|
---|
| 575 | inline int RegularExpression::matchUnion(Context* const context,
|
---|
| 576 | const Op* const op, int offset,
|
---|
| 577 | const short direction)
|
---|
| 578 | {
|
---|
| 579 | unsigned int opSize = op->getSize();
|
---|
| 580 | int ret = -1;
|
---|
| 581 |
|
---|
| 582 | for (unsigned int i=0; i < opSize; i++) {
|
---|
| 583 |
|
---|
| 584 | ret = match(context, op->elementAt(i), offset, direction);
|
---|
| 585 |
|
---|
| 586 | if (ret == context->fLimit)
|
---|
| 587 | return ret;
|
---|
| 588 | }
|
---|
| 589 |
|
---|
| 590 | return -1;
|
---|
| 591 | }
|
---|
| 592 |
|
---|
| 593 | inline int RegularExpression::matchModifier(Context* const context,
|
---|
| 594 | const Op* const op, int offset,
|
---|
| 595 | const short direction)
|
---|
| 596 | {
|
---|
| 597 | int saveOptions = fOptions;
|
---|
| 598 | fOptions |= (int) op->getData();
|
---|
| 599 | fOptions &= (int) ~op->getData2();
|
---|
| 600 |
|
---|
| 601 | int ret = match(context, op->getChild(), offset, direction);
|
---|
| 602 |
|
---|
| 603 | fOptions = saveOptions;
|
---|
| 604 |
|
---|
| 605 | return ret;
|
---|
| 606 | }
|
---|
| 607 |
|
---|
| 608 | inline unsigned short RegularExpression::getWordType(const XMLCh* const target
|
---|
| 609 | , const int begin
|
---|
| 610 | , const int end
|
---|
| 611 | , const int offset)
|
---|
| 612 | {
|
---|
| 613 | if (offset < begin || offset >= end)
|
---|
| 614 | return WT_OTHER;
|
---|
| 615 |
|
---|
| 616 | return getCharType(target[offset]);
|
---|
| 617 | }
|
---|
| 618 |
|
---|
| 619 | inline
|
---|
| 620 | unsigned short RegularExpression::getPreviousWordType(const XMLCh* const target
|
---|
| 621 | , const int start
|
---|
| 622 | , const int end
|
---|
| 623 | , int offset)
|
---|
| 624 | {
|
---|
| 625 | unsigned short ret = getWordType(target, start, end, --offset);
|
---|
| 626 |
|
---|
| 627 | while (ret == WT_IGNORE) {
|
---|
| 628 | ret = getWordType(target, start, end, --offset);
|
---|
| 629 | }
|
---|
| 630 |
|
---|
| 631 | return ret;
|
---|
| 632 | }
|
---|
| 633 |
|
---|
| 634 | inline bool RegularExpression::matchIgnoreCase(const XMLInt32 ch1,
|
---|
| 635 | const XMLInt32 ch2)
|
---|
| 636 | {
|
---|
| 637 |
|
---|
| 638 | return (0==XMLString::compareNIString((XMLCh*)&ch1,(XMLCh*)&ch2, 1));
|
---|
| 639 | }
|
---|
| 640 |
|
---|
| 641 |
|
---|
| 642 | XERCES_CPP_NAMESPACE_END
|
---|
| 643 |
|
---|
| 644 | #endif
|
---|
| 645 | /**
|
---|
| 646 | * End of file RegularExpression.hpp
|
---|
| 647 | */
|
---|
| 648 |
|
---|