source: NonGTP/Xerces/xerces/include/xercesc/util/XMLChar.hpp @ 358

Revision 358, 13.2 KB checked in by bittner, 19 years ago (diff)

xerces added

Line 
1/*
2 * Copyright 2002,2004 The Apache Software Foundation.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17/*
18 * $Log: XMLChar.hpp,v $
19 * Revision 1.5  2004/09/08 13:56:24  peiyongz
20 * Apache License Version 2.0
21 *
22 * Revision 1.4  2004/08/11 16:08:04  peiyongz
23 * isValidNmToken
24 *
25 * Revision 1.3  2004/01/29 11:48:47  cargilld
26 * Code cleanup changes to get rid of various compiler diagnostic messages.
27 *
28 * Revision 1.2  2003/08/14 02:57:27  knoaman
29 * Code refactoring to improve performance of validation.
30 *
31 * Revision 1.1  2002/12/20 22:10:21  tng
32 * XML 1.1
33 *
34 */
35
36#if !defined(XMLCHAR_HPP)
37#define XMLCHAR_HPP
38
39#include <xercesc/util/XMLUniDefs.hpp>
40
41XERCES_CPP_NAMESPACE_BEGIN
42
43// ---------------------------------------------------------------------------
44//  This file defines Char and utility that conforms to XML 1.0 and XML 1.1
45// ---------------------------------------------------------------------------
46// Masks for the fgCharCharsTable1_0 array
47const XMLByte   gLetterCharMask             = 0x1;
48const XMLByte   gFirstNameCharMask          = 0x2;
49const XMLByte   gNameCharMask               = 0x4;
50const XMLByte   gPlainContentCharMask       = 0x8;
51const XMLByte   gSpecialStartTagCharMask    = 0x10;
52const XMLByte   gControlCharMask            = 0x20;
53const XMLByte   gXMLCharMask                = 0x40;
54const XMLByte   gWhitespaceCharMask         = 0x80;
55
56// ---------------------------------------------------------------------------
57//  This class is for XML 1.0
58// ---------------------------------------------------------------------------
59class XMLUTIL_EXPORT XMLChar1_0
60{
61public:
62    // -----------------------------------------------------------------------
63    //  Public, static methods, check the string
64    // -----------------------------------------------------------------------
65    static bool isAllSpaces
66    (
67        const   XMLCh* const    toCheck
68        , const unsigned int    count
69    );
70
71    static bool containsWhiteSpace
72    (
73        const   XMLCh* const    toCheck
74        , const unsigned int    count
75    );
76
77    static bool isValidNmtoken
78    (
79        const   XMLCh*        const    toCheck
80      , const   unsigned int           count
81    );
82
83    static bool isValidName
84    (
85        const   XMLCh* const    toCheck
86        , const unsigned int    count
87    );
88
89    static bool isValidNCName
90    (
91        const   XMLCh* const    toCheck
92        , const unsigned int    count
93    );
94
95    static bool isValidQName
96    (
97        const   XMLCh* const    toCheck
98        , const unsigned int    count
99    );
100
101    // -----------------------------------------------------------------------
102    //  Public, static methods, check the XMLCh
103    //  surrogate pair is assumed if second parameter is not null
104    // -----------------------------------------------------------------------
105    static bool isXMLLetter(const XMLCh toCheck, const XMLCh toCheck2 = 0);
106    static bool isFirstNameChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
107    static bool isNameChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
108    static bool isPlainContentChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
109    static bool isSpecialStartTagChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
110    static bool isXMLChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
111    static bool isWhitespace(const XMLCh toCheck);
112    static bool isWhitespace(const XMLCh toCheck, const XMLCh toCheck2);
113    static bool isControlChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
114
115    static bool isPublicIdChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
116
117    // -----------------------------------------------------------------------
118    //  Special Non-conformant Public, static methods
119    // -----------------------------------------------------------------------
120    /**
121      * Return true if NEL (0x85) and LSEP (0x2028) to be treated as white space char.
122      */
123    static bool isNELRecognized();
124
125    /**
126      * Method to enable NEL (0x85) and LSEP (0x2028) to be treated as white space char.
127      */
128    static void enableNELWS();
129
130private:
131    // -----------------------------------------------------------------------
132    //  Unimplemented constructors and operators
133    // -----------------------------------------------------------------------
134    XMLChar1_0();
135
136    // -----------------------------------------------------------------------
137    //  Static data members
138    //
139    //  fgCharCharsTable1_0
140    //      The character characteristics table. Bits in each byte, represent
141    //      the characteristics of each character. It is generated via some
142    //      code and then hard coded into the cpp file for speed.
143    //
144    //  fNEL
145    //      Flag to respresents whether NEL and LSEP newline recognition is enabled
146    //      or disabled
147    // -----------------------------------------------------------------------
148    static XMLByte  fgCharCharsTable1_0[0x10000];
149    static bool     enableNEL;
150
151    friend class XMLReader;
152};
153
154
155// ---------------------------------------------------------------------------
156//  XMLReader: Public, static methods
157// ---------------------------------------------------------------------------
158inline bool XMLChar1_0::isXMLLetter(const XMLCh toCheck, const XMLCh toCheck2)
159{
160    if (!toCheck2)
161        return ((fgCharCharsTable1_0[toCheck] & gLetterCharMask) != 0);
162    return false;
163}
164
165inline bool XMLChar1_0::isFirstNameChar(const XMLCh toCheck, const XMLCh toCheck2)
166{
167    if (!toCheck2)
168        return ((fgCharCharsTable1_0[toCheck] & gFirstNameCharMask) != 0);
169    return false;
170}
171
172inline bool XMLChar1_0::isNameChar(const XMLCh toCheck, const XMLCh toCheck2)
173{
174    if (!toCheck2)
175        return ((fgCharCharsTable1_0[toCheck] & gNameCharMask) != 0);
176    return false;
177}
178
179inline bool XMLChar1_0::isPlainContentChar(const XMLCh toCheck, const XMLCh toCheck2)
180{
181    if (!toCheck2)
182        return ((fgCharCharsTable1_0[toCheck] & gPlainContentCharMask) != 0);
183    else {
184        if ((toCheck >= 0xD800) && (toCheck <= 0xDBFF))
185           if ((toCheck2 >= 0xDC00) && (toCheck2 <= 0xDFFF))
186               return true;
187    }
188    return false;
189}
190
191
192inline bool XMLChar1_0::isSpecialStartTagChar(const XMLCh toCheck, const XMLCh toCheck2)
193{
194    if (!toCheck2)
195        return ((fgCharCharsTable1_0[toCheck] & gSpecialStartTagCharMask) != 0);
196    return false;
197}
198
199inline bool XMLChar1_0::isXMLChar(const XMLCh toCheck, const XMLCh toCheck2)
200{
201    if (!toCheck2)
202        return ((fgCharCharsTable1_0[toCheck] & gXMLCharMask) != 0);
203    else {
204        if ((toCheck >= 0xD800) && (toCheck <= 0xDBFF))
205           if ((toCheck2 >= 0xDC00) && (toCheck2 <= 0xDFFF))
206               return true;
207    }
208    return false;
209}
210
211inline bool XMLChar1_0::isWhitespace(const XMLCh toCheck)
212{
213    return ((fgCharCharsTable1_0[toCheck] & gWhitespaceCharMask) != 0);
214}
215
216inline bool XMLChar1_0::isWhitespace(const XMLCh toCheck, const XMLCh toCheck2)
217{
218    if (!toCheck2)
219        return ((fgCharCharsTable1_0[toCheck] & gWhitespaceCharMask) != 0);
220    return false;
221}
222
223inline bool XMLChar1_0::isControlChar(const XMLCh toCheck, const XMLCh toCheck2)
224{
225    if (!toCheck2)
226        return ((fgCharCharsTable1_0[toCheck] & gControlCharMask) != 0);
227    return false;
228}
229
230inline bool XMLChar1_0::isNELRecognized() {
231
232    return enableNEL;
233}
234
235
236// ---------------------------------------------------------------------------
237//  This class is for XML 1.1
238// ---------------------------------------------------------------------------
239class XMLUTIL_EXPORT XMLChar1_1
240{
241public:
242    // -----------------------------------------------------------------------
243    //  Public, static methods, check the string
244    // -----------------------------------------------------------------------
245    static bool isAllSpaces
246    (
247        const   XMLCh* const    toCheck
248        , const unsigned int    count
249    );
250
251    static bool containsWhiteSpace
252    (
253        const   XMLCh* const    toCheck
254        , const unsigned int    count
255    );
256
257    static bool isValidNmtoken
258    (
259        const   XMLCh*        const    toCheck
260      , const   unsigned int           count
261    );
262
263    static bool isValidName
264    (
265        const   XMLCh* const    toCheck
266        , const unsigned int    count
267    );
268
269    static bool isValidNCName
270    (
271        const   XMLCh* const    toCheck
272        , const unsigned int    count
273    );
274
275    static bool isValidQName
276    (
277        const   XMLCh* const    toCheck
278        , const unsigned int    count
279    );
280
281    // -----------------------------------------------------------------------
282    //  Public, static methods, check the XMLCh
283    // -----------------------------------------------------------------------
284    static bool isXMLLetter(const XMLCh toCheck, const XMLCh toCheck2 = 0);
285    static bool isFirstNameChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
286    static bool isNameChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
287    static bool isPlainContentChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
288    static bool isSpecialStartTagChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
289    static bool isXMLChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
290    static bool isWhitespace(const XMLCh toCheck, const XMLCh toCheck2 = 0);
291    static bool isControlChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
292
293    static bool isPublicIdChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
294
295private:
296    // -----------------------------------------------------------------------
297    //  Unimplemented constructors and operators
298    // -----------------------------------------------------------------------
299    XMLChar1_1();
300
301    // -----------------------------------------------------------------------
302    //  Static data members
303    //
304    //  fgCharCharsTable1_1
305    //      The character characteristics table. Bits in each byte, represent
306    //      the characteristics of each character. It is generated via some
307    //      code and then hard coded into the cpp file for speed.
308    //
309    // -----------------------------------------------------------------------
310    static XMLByte  fgCharCharsTable1_1[0x10000];
311
312    friend class XMLReader;
313};
314
315
316// ---------------------------------------------------------------------------
317//  XMLReader: Public, static methods
318// ---------------------------------------------------------------------------
319inline bool XMLChar1_1::isXMLLetter(const XMLCh toCheck, const XMLCh toCheck2)
320{
321    if (!toCheck2)
322        return ((fgCharCharsTable1_1[toCheck] & gLetterCharMask) != 0);
323    return false;
324}
325
326inline bool XMLChar1_1::isFirstNameChar(const XMLCh toCheck, const XMLCh toCheck2)
327{
328    if (!toCheck2)
329        return ((fgCharCharsTable1_1[toCheck] & gFirstNameCharMask) != 0);
330    else {
331        if ((toCheck >= 0xD800) && (toCheck <= 0xDB7F))
332           if ((toCheck2 >= 0xDC00) && (toCheck2 <= 0xDFFF))
333               return true;
334    }
335    return false;
336}
337
338inline bool XMLChar1_1::isNameChar(const XMLCh toCheck, const XMLCh toCheck2)
339{
340    if (!toCheck2)
341        return ((fgCharCharsTable1_1[toCheck] & gNameCharMask) != 0);
342    else {
343        if ((toCheck >= 0xD800) && (toCheck <= 0xDB7F))
344           if ((toCheck2 >= 0xDC00) && (toCheck2 <= 0xDFFF))
345               return true;
346    }
347    return false;
348}
349
350inline bool XMLChar1_1::isPlainContentChar(const XMLCh toCheck, const XMLCh toCheck2)
351{
352    if (!toCheck2)
353        return ((fgCharCharsTable1_1[toCheck] & gPlainContentCharMask) != 0);
354    else {
355        if ((toCheck >= 0xD800) && (toCheck <= 0xDBFF))
356           if ((toCheck2 >= 0xDC00) && (toCheck2 <= 0xDFFF))
357               return true;
358    }
359    return false;
360}
361
362
363inline bool XMLChar1_1::isSpecialStartTagChar(const XMLCh toCheck, const XMLCh toCheck2)
364{
365    if (!toCheck2)
366        return ((fgCharCharsTable1_1[toCheck] & gSpecialStartTagCharMask) != 0);
367    return false;
368}
369
370inline bool XMLChar1_1::isXMLChar(const XMLCh toCheck, const XMLCh toCheck2)
371{
372    if (!toCheck2)
373        return ((fgCharCharsTable1_1[toCheck] & gXMLCharMask) != 0);
374    else {
375        if ((toCheck >= 0xD800) && (toCheck <= 0xDBFF))
376           if ((toCheck2 >= 0xDC00) && (toCheck2 <= 0xDFFF))
377               return true;
378    }
379    return false;
380}
381
382inline bool XMLChar1_1::isWhitespace(const XMLCh toCheck, const XMLCh toCheck2)
383{
384    if (!toCheck2)
385        return ((fgCharCharsTable1_1[toCheck] & gWhitespaceCharMask) != 0);
386    return false;
387}
388
389inline bool XMLChar1_1::isControlChar(const XMLCh toCheck, const XMLCh toCheck2)
390{
391    if (!toCheck2)
392        return ((fgCharCharsTable1_1[toCheck] & gControlCharMask) != 0);
393    return false;
394}
395
396
397XERCES_CPP_NAMESPACE_END
398
399#endif
Note: See TracBrowser for help on using the repository browser.