http://xml.apache.org/http://www.apache.org/http://www.w3.org/

Home

Readme
Release Info

Installation
Download
Build

FAQs
Samples
API Docs

DOM C++ Binding
Programming
Migration Guide

Feedback
Bug-Reporting
PDF Document

CVS Repository
Mail Archive

API Docs for SAX and DOM
 

Main Page   Class Hierarchy   Alphabetical List   Compound List   File List   Compound Members   File Members  

XMLChar.hpp

Go to the documentation of this file.
00001 /*
00002  * Copyright 2002,2004 The Apache Software Foundation.
00003  * 
00004  * Licensed under the Apache License, Version 2.0 (the "License");
00005  * you may not use this file except in compliance with the License.
00006  * You may obtain a copy of the License at
00007  * 
00008  *      http://www.apache.org/licenses/LICENSE-2.0
00009  * 
00010  * Unless required by applicable law or agreed to in writing, software
00011  * distributed under the License is distributed on an "AS IS" BASIS,
00012  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
00013  * See the License for the specific language governing permissions and
00014  * limitations under the License.
00015  */
00016 
00017 /*
00018  * $Log: XMLChar.hpp,v $
00019  * Revision 1.5  2004/09/08 13:56:24  peiyongz
00020  * Apache License Version 2.0
00021  *
00022  * Revision 1.4  2004/08/11 16:08:04  peiyongz
00023  * isValidNmToken
00024  *
00025  * Revision 1.3  2004/01/29 11:48:47  cargilld
00026  * Code cleanup changes to get rid of various compiler diagnostic messages.
00027  *
00028  * Revision 1.2  2003/08/14 02:57:27  knoaman
00029  * Code refactoring to improve performance of validation.
00030  *
00031  * Revision 1.1  2002/12/20 22:10:21  tng
00032  * XML 1.1
00033  *
00034  */
00035 
00036 #if !defined(XMLCHAR_HPP)
00037 #define XMLCHAR_HPP
00038 
00039 #include <xercesc/util/XMLUniDefs.hpp>
00040 
00041 XERCES_CPP_NAMESPACE_BEGIN
00042 
00043 // ---------------------------------------------------------------------------
00044 //  This file defines Char and utility that conforms to XML 1.0 and XML 1.1
00045 // ---------------------------------------------------------------------------
00046 // Masks for the fgCharCharsTable1_0 array
00047 const XMLByte   gLetterCharMask             = 0x1;
00048 const XMLByte   gFirstNameCharMask          = 0x2;
00049 const XMLByte   gNameCharMask               = 0x4;
00050 const XMLByte   gPlainContentCharMask       = 0x8;
00051 const XMLByte   gSpecialStartTagCharMask    = 0x10;
00052 const XMLByte   gControlCharMask            = 0x20;
00053 const XMLByte   gXMLCharMask                = 0x40;
00054 const XMLByte   gWhitespaceCharMask         = 0x80;
00055 
00056 // ---------------------------------------------------------------------------
00057 //  This class is for XML 1.0
00058 // ---------------------------------------------------------------------------
00059 class  XMLChar1_0
00060 {
00061 public:
00062     // -----------------------------------------------------------------------
00063     //  Public, static methods, check the string
00064     // -----------------------------------------------------------------------
00065     static bool isAllSpaces
00066     (
00067         const   XMLCh* const    toCheck
00068         , const unsigned int    count
00069     );
00070 
00071     static bool containsWhiteSpace
00072     (
00073         const   XMLCh* const    toCheck
00074         , const unsigned int    count
00075     );
00076 
00077     static bool isValidNmtoken
00078     (
00079         const   XMLCh*        const    toCheck
00080       , const   unsigned int           count
00081     );
00082 
00083     static bool isValidName
00084     (
00085         const   XMLCh* const    toCheck
00086         , const unsigned int    count
00087     );
00088 
00089     static bool isValidNCName
00090     (
00091         const   XMLCh* const    toCheck
00092         , const unsigned int    count
00093     );
00094 
00095     static bool isValidQName
00096     (
00097         const   XMLCh* const    toCheck
00098         , const unsigned int    count
00099     );
00100 
00101     // -----------------------------------------------------------------------
00102     //  Public, static methods, check the XMLCh
00103     //  surrogate pair is assumed if second parameter is not null
00104     // -----------------------------------------------------------------------
00105     static bool isXMLLetter(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00106     static bool isFirstNameChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00107     static bool isNameChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00108     static bool isPlainContentChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00109     static bool isSpecialStartTagChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00110     static bool isXMLChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00111     static bool isWhitespace(const XMLCh toCheck);
00112     static bool isWhitespace(const XMLCh toCheck, const XMLCh toCheck2);
00113     static bool isControlChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00114 
00115     static bool isPublicIdChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00116 
00117     // -----------------------------------------------------------------------
00118     //  Special Non-conformant Public, static methods
00119     // -----------------------------------------------------------------------
00123     static bool isNELRecognized();
00124 
00128     static void enableNELWS();
00129 
00130 private:
00131     // -----------------------------------------------------------------------
00132     //  Unimplemented constructors and operators
00133     // -----------------------------------------------------------------------
00134     XMLChar1_0();
00135 
00136     // -----------------------------------------------------------------------
00137     //  Static data members
00138     //
00139     //  fgCharCharsTable1_0
00140     //      The character characteristics table. Bits in each byte, represent
00141     //      the characteristics of each character. It is generated via some
00142     //      code and then hard coded into the cpp file for speed.
00143     //
00144     //  fNEL
00145     //      Flag to respresents whether NEL and LSEP newline recognition is enabled
00146     //      or disabled
00147     // -----------------------------------------------------------------------
00148     static XMLByte  fgCharCharsTable1_0[0x10000];
00149     static bool     enableNEL;
00150 
00151     friend class XMLReader;
00152 };
00153 
00154 
00155 // ---------------------------------------------------------------------------
00156 //  XMLReader: Public, static methods
00157 // ---------------------------------------------------------------------------
00158 inline bool XMLChar1_0::isXMLLetter(const XMLCh toCheck, const XMLCh toCheck2)
00159 {
00160     if (!toCheck2)
00161         return ((fgCharCharsTable1_0[toCheck] & gLetterCharMask) != 0);
00162     return false;
00163 }
00164 
00165 inline bool XMLChar1_0::isFirstNameChar(const XMLCh toCheck, const XMLCh toCheck2)
00166 {
00167     if (!toCheck2)
00168         return ((fgCharCharsTable1_0[toCheck] & gFirstNameCharMask) != 0);
00169     return false;
00170 }
00171 
00172 inline bool XMLChar1_0::isNameChar(const XMLCh toCheck, const XMLCh toCheck2)
00173 {
00174     if (!toCheck2)
00175         return ((fgCharCharsTable1_0[toCheck] & gNameCharMask) != 0);
00176     return false;
00177 }
00178 
00179 inline bool XMLChar1_0::isPlainContentChar(const XMLCh toCheck, const XMLCh toCheck2)
00180 {
00181     if (!toCheck2)
00182         return ((fgCharCharsTable1_0[toCheck] & gPlainContentCharMask) != 0);
00183     else {
00184         if ((toCheck >= 0xD800) && (toCheck <= 0xDBFF))
00185            if ((toCheck2 >= 0xDC00) && (toCheck2 <= 0xDFFF))
00186                return true;
00187     }
00188     return false;
00189 }
00190 
00191 
00192 inline bool XMLChar1_0::isSpecialStartTagChar(const XMLCh toCheck, const XMLCh toCheck2)
00193 {
00194     if (!toCheck2)
00195         return ((fgCharCharsTable1_0[toCheck] & gSpecialStartTagCharMask) != 0);
00196     return false;
00197 }
00198 
00199 inline bool XMLChar1_0::isXMLChar(const XMLCh toCheck, const XMLCh toCheck2)
00200 {
00201     if (!toCheck2)
00202         return ((fgCharCharsTable1_0[toCheck] & gXMLCharMask) != 0);
00203     else {
00204         if ((toCheck >= 0xD800) && (toCheck <= 0xDBFF))
00205            if ((toCheck2 >= 0xDC00) && (toCheck2 <= 0xDFFF))
00206                return true;
00207     }
00208     return false;
00209 }
00210 
00211 inline bool XMLChar1_0::isWhitespace(const XMLCh toCheck)
00212 {
00213     return ((fgCharCharsTable1_0[toCheck] & gWhitespaceCharMask) != 0);
00214 }
00215 
00216 inline bool XMLChar1_0::isWhitespace(const XMLCh toCheck, const XMLCh toCheck2)
00217 {
00218     if (!toCheck2)
00219         return ((fgCharCharsTable1_0[toCheck] & gWhitespaceCharMask) != 0);
00220     return false;
00221 }
00222 
00223 inline bool XMLChar1_0::isControlChar(const XMLCh toCheck, const XMLCh toCheck2)
00224 {
00225     if (!toCheck2)
00226         return ((fgCharCharsTable1_0[toCheck] & gControlCharMask) != 0);
00227     return false;
00228 }
00229 
00230 inline bool XMLChar1_0::isNELRecognized() {
00231 
00232     return enableNEL;
00233 }
00234 
00235 
00236 // ---------------------------------------------------------------------------
00237 //  This class is for XML 1.1
00238 // ---------------------------------------------------------------------------
00239 class  XMLChar1_1
00240 {
00241 public:
00242     // -----------------------------------------------------------------------
00243     //  Public, static methods, check the string
00244     // -----------------------------------------------------------------------
00245     static bool isAllSpaces
00246     (
00247         const   XMLCh* const    toCheck
00248         , const unsigned int    count
00249     );
00250 
00251     static bool containsWhiteSpace
00252     (
00253         const   XMLCh* const    toCheck
00254         , const unsigned int    count
00255     );
00256 
00257     static bool isValidNmtoken
00258     (
00259         const   XMLCh*        const    toCheck
00260       , const   unsigned int           count
00261     );
00262 
00263     static bool isValidName
00264     (
00265         const   XMLCh* const    toCheck
00266         , const unsigned int    count
00267     );
00268 
00269     static bool isValidNCName
00270     (
00271         const   XMLCh* const    toCheck
00272         , const unsigned int    count
00273     );
00274 
00275     static bool isValidQName
00276     (
00277         const   XMLCh* const    toCheck
00278         , const unsigned int    count
00279     );
00280 
00281     // -----------------------------------------------------------------------
00282     //  Public, static methods, check the XMLCh
00283     // -----------------------------------------------------------------------
00284     static bool isXMLLetter(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00285     static bool isFirstNameChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00286     static bool isNameChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00287     static bool isPlainContentChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00288     static bool isSpecialStartTagChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00289     static bool isXMLChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00290     static bool isWhitespace(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00291     static bool isControlChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00292 
00293     static bool isPublicIdChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
00294 
00295 private:
00296     // -----------------------------------------------------------------------
00297     //  Unimplemented constructors and operators
00298     // -----------------------------------------------------------------------
00299     XMLChar1_1();
00300 
00301     // -----------------------------------------------------------------------
00302     //  Static data members
00303     //
00304     //  fgCharCharsTable1_1
00305     //      The character characteristics table. Bits in each byte, represent
00306     //      the characteristics of each character. It is generated via some
00307     //      code and then hard coded into the cpp file for speed.
00308     //
00309     // -----------------------------------------------------------------------
00310     static XMLByte  fgCharCharsTable1_1[0x10000];
00311 
00312     friend class XMLReader;
00313 };
00314 
00315 
00316 // ---------------------------------------------------------------------------
00317 //  XMLReader: Public, static methods
00318 // ---------------------------------------------------------------------------
00319 inline bool XMLChar1_1::isXMLLetter(const XMLCh toCheck, const XMLCh toCheck2)
00320 {
00321     if (!toCheck2)
00322         return ((fgCharCharsTable1_1[toCheck] & gLetterCharMask) != 0);
00323     return false;
00324 }
00325 
00326 inline bool XMLChar1_1::isFirstNameChar(const XMLCh toCheck, const XMLCh toCheck2)
00327 {
00328     if (!toCheck2)
00329         return ((fgCharCharsTable1_1[toCheck] & gFirstNameCharMask) != 0);
00330     else {
00331         if ((toCheck >= 0xD800) && (toCheck <= 0xDB7F))
00332            if ((toCheck2 >= 0xDC00) && (toCheck2 <= 0xDFFF))
00333                return true;
00334     }
00335     return false;
00336 }
00337 
00338 inline bool XMLChar1_1::isNameChar(const XMLCh toCheck, const XMLCh toCheck2)
00339 {
00340     if (!toCheck2)
00341         return ((fgCharCharsTable1_1[toCheck] & gNameCharMask) != 0);
00342     else {
00343         if ((toCheck >= 0xD800) && (toCheck <= 0xDB7F))
00344            if ((toCheck2 >= 0xDC00) && (toCheck2 <= 0xDFFF))
00345                return true;
00346     }
00347     return false;
00348 }
00349 
00350 inline bool XMLChar1_1::isPlainContentChar(const XMLCh toCheck, const XMLCh toCheck2)
00351 {
00352     if (!toCheck2)
00353         return ((fgCharCharsTable1_1[toCheck] & gPlainContentCharMask) != 0);
00354     else {
00355         if ((toCheck >= 0xD800) && (toCheck <= 0xDBFF))
00356            if ((toCheck2 >= 0xDC00) && (toCheck2 <= 0xDFFF))
00357                return true;
00358     }
00359     return false;
00360 }
00361 
00362 
00363 inline bool XMLChar1_1::isSpecialStartTagChar(const XMLCh toCheck, const XMLCh toCheck2)
00364 {
00365     if (!toCheck2)
00366         return ((fgCharCharsTable1_1[toCheck] & gSpecialStartTagCharMask) != 0);
00367     return false;
00368 }
00369 
00370 inline bool XMLChar1_1::isXMLChar(const XMLCh toCheck, const XMLCh toCheck2)
00371 {
00372     if (!toCheck2)
00373         return ((fgCharCharsTable1_1[toCheck] & gXMLCharMask) != 0);
00374     else {
00375         if ((toCheck >= 0xD800) && (toCheck <= 0xDBFF))
00376            if ((toCheck2 >= 0xDC00) && (toCheck2 <= 0xDFFF))
00377                return true;
00378     }
00379     return false;
00380 }
00381 
00382 inline bool XMLChar1_1::isWhitespace(const XMLCh toCheck, const XMLCh toCheck2)
00383 {
00384     if (!toCheck2)
00385         return ((fgCharCharsTable1_1[toCheck] & gWhitespaceCharMask) != 0);
00386     return false;
00387 }
00388 
00389 inline bool XMLChar1_1::isControlChar(const XMLCh toCheck, const XMLCh toCheck2)
00390 {
00391     if (!toCheck2)
00392         return ((fgCharCharsTable1_1[toCheck] & gControlCharMask) != 0);
00393     return false;
00394 }
00395 
00396 
00397 XERCES_CPP_NAMESPACE_END
00398 
00399 #endif


Copyright © 1994-2004 The Apache Software Foundation. All Rights Reserved.