source: NonGTP/Xerces/xerces-c_2_8_0/include/xercesc/util/XMLChar.hpp @ 2674

Revision 2674, 14.4 KB checked in by mattausch, 16 years ago (diff)
Line 
1/*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements.  See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License.  You may obtain a copy of the License at
8 *
9 *      http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18/*
19 * $Id: XMLChar.hpp 568078 2007-08-21 11:43:25Z amassari $
20 */
21
22#if !defined(XMLCHAR_HPP)
23#define XMLCHAR_HPP
24
25#include <xercesc/util/XMLUniDefs.hpp>
26
27XERCES_CPP_NAMESPACE_BEGIN
28
29// ---------------------------------------------------------------------------
30//  This file defines Char and utility that conforms to XML 1.0 and XML 1.1
31// ---------------------------------------------------------------------------
32// Masks for the fgCharCharsTable1_0 array
33const XMLByte   gNCNameCharMask             = 0x1;
34const XMLByte   gFirstNameCharMask          = 0x2;
35const XMLByte   gNameCharMask               = 0x4;
36const XMLByte   gPlainContentCharMask       = 0x8;
37const XMLByte   gSpecialStartTagCharMask    = 0x10;
38const XMLByte   gControlCharMask            = 0x20;
39const XMLByte   gXMLCharMask                = 0x40;
40const XMLByte   gWhitespaceCharMask         = 0x80;
41
42// ---------------------------------------------------------------------------
43//  This class is for XML 1.0
44// ---------------------------------------------------------------------------
45class XMLUTIL_EXPORT XMLChar1_0
46{
47public:
48    // -----------------------------------------------------------------------
49    //  Public, static methods, check the string
50    // -----------------------------------------------------------------------
51    static bool isAllSpaces
52    (
53        const   XMLCh* const    toCheck
54        , const unsigned int    count
55    );
56
57    static bool containsWhiteSpace
58    (
59        const   XMLCh* const    toCheck
60        , const unsigned int    count
61    );
62
63    static bool isValidNmtoken
64    (
65        const   XMLCh*        const    toCheck
66      , const   unsigned int           count
67    );
68
69    static bool isValidName
70    (
71        const   XMLCh* const    toCheck
72        , const unsigned int    count
73    );
74
75    static bool isValidName
76    (
77        const   XMLCh* const    toCheck
78    );
79
80    static bool isValidNCName
81    (
82        const   XMLCh* const    toCheck
83        , const unsigned int    count
84    );
85
86    static bool isValidQName
87    (
88        const   XMLCh* const    toCheck
89        , const unsigned int    count
90    );
91
92    // -----------------------------------------------------------------------
93    //  Public, static methods, check the XMLCh
94    //  surrogate pair is assumed if second parameter is not null
95    // -----------------------------------------------------------------------
96    static bool isXMLLetter(const XMLCh toCheck, const XMLCh toCheck2 = 0);
97    static bool isFirstNameChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
98    static bool isNameChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
99    static bool isPlainContentChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
100    static bool isSpecialStartTagChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
101    static bool isXMLChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
102    static bool isWhitespace(const XMLCh toCheck);
103    static bool isWhitespace(const XMLCh toCheck, const XMLCh toCheck2);
104    static bool isControlChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
105
106    static bool isPublicIdChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
107    static bool isFirstNCNameChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
108    static bool isNCNameChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
109
110    // -----------------------------------------------------------------------
111    //  Special Non-conformant Public, static methods
112    // -----------------------------------------------------------------------
113    /**
114      * Return true if NEL (0x85) and LSEP (0x2028) to be treated as white space char.
115      */
116    static bool isNELRecognized();
117
118    /**
119      * Method to enable NEL (0x85) and LSEP (0x2028) to be treated as white space char.
120      */
121    static void enableNELWS();
122
123private:
124    // -----------------------------------------------------------------------
125    //  Unimplemented constructors and operators
126    // -----------------------------------------------------------------------
127    XMLChar1_0();
128
129    // -----------------------------------------------------------------------
130    //  Static data members
131    //
132    //  fgCharCharsTable1_0
133    //      The character characteristics table. Bits in each byte, represent
134    //      the characteristics of each character. It is generated via some
135    //      code and then hard coded into the cpp file for speed.
136    //
137    //  fNEL
138    //      Flag to respresents whether NEL and LSEP newline recognition is enabled
139    //      or disabled
140    // -----------------------------------------------------------------------
141    static XMLByte  fgCharCharsTable1_0[0x10000];
142    static bool     enableNEL;
143
144    friend class XMLReader;
145};
146
147
148// ---------------------------------------------------------------------------
149//  XMLReader: Public, static methods
150// ---------------------------------------------------------------------------
151inline bool XMLChar1_0::isXMLLetter(const XMLCh toCheck, const XMLCh toCheck2)
152{
153    // An XML letter is a FirstNameChar minus ':' and '_'.
154    if (!toCheck2) {
155        return (((fgCharCharsTable1_0[toCheck] & gFirstNameCharMask) != 0)
156                && (toCheck != chColon) && (toCheck != chUnderscore));
157    }
158    return false;
159}
160
161inline bool XMLChar1_0::isFirstNameChar(const XMLCh toCheck, const XMLCh toCheck2)
162{
163    if (!toCheck2)
164        return ((fgCharCharsTable1_0[toCheck] & gFirstNameCharMask) != 0);
165    return false;
166}
167
168inline bool XMLChar1_0::isFirstNCNameChar(const XMLCh toCheck, const XMLCh toCheck2)
169{
170    if (!toCheck2) {
171        return (((fgCharCharsTable1_0[toCheck] & gFirstNameCharMask) != 0) && (toCheck != chColon));
172    }
173
174    return false;
175}
176
177inline bool XMLChar1_0::isNameChar(const XMLCh toCheck, const XMLCh toCheck2)
178{
179    if (!toCheck2)
180        return ((fgCharCharsTable1_0[toCheck] & gNameCharMask) != 0);
181    return false;
182}
183
184inline bool XMLChar1_0::isNCNameChar(const XMLCh toCheck, const XMLCh toCheck2)
185{
186    if (!toCheck2)
187        return ((fgCharCharsTable1_0[toCheck] & gNCNameCharMask) != 0);
188    return false;
189}
190
191inline bool XMLChar1_0::isPlainContentChar(const XMLCh toCheck, const XMLCh toCheck2)
192{
193    if (!toCheck2)
194        return ((fgCharCharsTable1_0[toCheck] & gPlainContentCharMask) != 0);
195    else {
196        if ((toCheck >= 0xD800) && (toCheck <= 0xDBFF))
197           if ((toCheck2 >= 0xDC00) && (toCheck2 <= 0xDFFF))
198               return true;
199    }
200    return false;
201}
202
203
204inline bool XMLChar1_0::isSpecialStartTagChar(const XMLCh toCheck, const XMLCh toCheck2)
205{
206    if (!toCheck2)
207        return ((fgCharCharsTable1_0[toCheck] & gSpecialStartTagCharMask) != 0);
208    return false;
209}
210
211inline bool XMLChar1_0::isXMLChar(const XMLCh toCheck, const XMLCh toCheck2)
212{
213    if (!toCheck2)
214        return ((fgCharCharsTable1_0[toCheck] & gXMLCharMask) != 0);
215    else {
216        if ((toCheck >= 0xD800) && (toCheck <= 0xDBFF))
217           if ((toCheck2 >= 0xDC00) && (toCheck2 <= 0xDFFF))
218               return true;
219    }
220    return false;
221}
222
223inline bool XMLChar1_0::isWhitespace(const XMLCh toCheck)
224{
225    return ((fgCharCharsTable1_0[toCheck] & gWhitespaceCharMask) != 0);
226}
227
228inline bool XMLChar1_0::isWhitespace(const XMLCh toCheck, const XMLCh toCheck2)
229{
230    if (!toCheck2)
231        return ((fgCharCharsTable1_0[toCheck] & gWhitespaceCharMask) != 0);
232    return false;
233}
234
235inline bool XMLChar1_0::isControlChar(const XMLCh toCheck, const XMLCh toCheck2)
236{
237    if (!toCheck2)
238        return ((fgCharCharsTable1_0[toCheck] & gControlCharMask) != 0);
239    return false;
240}
241
242inline bool XMLChar1_0::isNELRecognized() {
243
244    return enableNEL;
245}
246
247
248// ---------------------------------------------------------------------------
249//  This class is for XML 1.1
250// ---------------------------------------------------------------------------
251class XMLUTIL_EXPORT XMLChar1_1
252{
253public:
254    // -----------------------------------------------------------------------
255    //  Public, static methods, check the string
256    // -----------------------------------------------------------------------
257    static bool isAllSpaces
258    (
259        const   XMLCh* const    toCheck
260        , const unsigned int    count
261    );
262
263    static bool containsWhiteSpace
264    (
265        const   XMLCh* const    toCheck
266        , const unsigned int    count
267    );
268
269    static bool isValidNmtoken
270    (
271        const   XMLCh*        const    toCheck
272      , const   unsigned int           count
273    );
274
275    static bool isValidName
276    (
277        const   XMLCh* const    toCheck
278        , const unsigned int    count
279    );
280
281    static bool isValidName
282    (
283        const   XMLCh* const    toCheck
284    );
285
286    static bool isValidNCName
287    (
288        const   XMLCh* const    toCheck
289        , const unsigned int    count
290    );
291
292    static bool isValidQName
293    (
294        const   XMLCh* const    toCheck
295        , const unsigned int    count
296    );
297
298    // -----------------------------------------------------------------------
299    //  Public, static methods, check the XMLCh
300    // -----------------------------------------------------------------------
301    static bool isXMLLetter(const XMLCh toCheck, const XMLCh toCheck2 = 0);
302    static bool isFirstNameChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
303    static bool isNameChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
304    static bool isPlainContentChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
305    static bool isSpecialStartTagChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
306    static bool isXMLChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
307    static bool isWhitespace(const XMLCh toCheck, const XMLCh toCheck2 = 0);
308    static bool isControlChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
309
310    static bool isPublicIdChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
311    static bool isFirstNCNameChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
312    static bool isNCNameChar(const XMLCh toCheck, const XMLCh toCheck2 = 0);
313
314private:
315    // -----------------------------------------------------------------------
316    //  Unimplemented constructors and operators
317    // -----------------------------------------------------------------------
318    XMLChar1_1();
319
320    // -----------------------------------------------------------------------
321    //  Static data members
322    //
323    //  fgCharCharsTable1_1
324    //      The character characteristics table. Bits in each byte, represent
325    //      the characteristics of each character. It is generated via some
326    //      code and then hard coded into the cpp file for speed.
327    //
328    // -----------------------------------------------------------------------
329    static XMLByte  fgCharCharsTable1_1[0x10000];
330
331    friend class XMLReader;
332};
333
334
335// ---------------------------------------------------------------------------
336//  XMLReader: Public, static methods
337// ---------------------------------------------------------------------------
338inline bool XMLChar1_1::isXMLLetter(const XMLCh toCheck, const XMLCh toCheck2)
339{
340    /** XML 1.1 does not define a letter, so we use the 1.0 definition */
341    return XMLChar1_0::isXMLLetter(toCheck, toCheck2);
342}
343
344inline bool XMLChar1_1::isFirstNameChar(const XMLCh toCheck, const XMLCh toCheck2)
345{
346    if (!toCheck2)
347        return ((fgCharCharsTable1_1[toCheck] & gFirstNameCharMask) != 0);
348    else {
349        if ((toCheck >= 0xD800) && (toCheck <= 0xDB7F))
350           if ((toCheck2 >= 0xDC00) && (toCheck2 <= 0xDFFF))
351               return true;
352    }
353    return false;
354}
355
356inline bool XMLChar1_1::isFirstNCNameChar(const XMLCh toCheck, const XMLCh toCheck2)
357{
358    if (!toCheck2) {
359        return (((fgCharCharsTable1_1[toCheck] & gFirstNameCharMask) != 0) && (toCheck != chColon));
360    }
361    else {
362        if ((toCheck >= 0xD800) && (toCheck <= 0xDB7F))
363           if ((toCheck2 >= 0xDC00) && (toCheck2 <= 0xDFFF))
364               return true;
365    }
366    return false;
367}
368
369inline bool XMLChar1_1::isNameChar(const XMLCh toCheck, const XMLCh toCheck2)
370{
371    if (!toCheck2)
372        return ((fgCharCharsTable1_1[toCheck] & gNameCharMask) != 0);
373    else {
374        if ((toCheck >= 0xD800) && (toCheck <= 0xDB7F))
375           if ((toCheck2 >= 0xDC00) && (toCheck2 <= 0xDFFF))
376               return true;
377    }
378    return false;
379}
380
381inline bool XMLChar1_1::isNCNameChar(const XMLCh toCheck, const XMLCh toCheck2)
382{
383    if (!toCheck2)
384        return ((fgCharCharsTable1_1[toCheck] & gNCNameCharMask) != 0);
385    else {
386        if ((toCheck >= 0xD800) && (toCheck <= 0xDB7F))
387           if ((toCheck2 >= 0xDC00) && (toCheck2 <= 0xDFFF))
388               return true;
389    }
390    return false;
391}
392
393inline bool XMLChar1_1::isPlainContentChar(const XMLCh toCheck, const XMLCh toCheck2)
394{
395    if (!toCheck2)
396        return ((fgCharCharsTable1_1[toCheck] & gPlainContentCharMask) != 0);
397    else {
398        if ((toCheck >= 0xD800) && (toCheck <= 0xDBFF))
399           if ((toCheck2 >= 0xDC00) && (toCheck2 <= 0xDFFF))
400               return true;
401    }
402    return false;
403}
404
405
406inline bool XMLChar1_1::isSpecialStartTagChar(const XMLCh toCheck, const XMLCh toCheck2)
407{
408    if (!toCheck2)
409        return ((fgCharCharsTable1_1[toCheck] & gSpecialStartTagCharMask) != 0);
410    return false;
411}
412
413inline bool XMLChar1_1::isXMLChar(const XMLCh toCheck, const XMLCh toCheck2)
414{
415    if (!toCheck2)
416        return ((fgCharCharsTable1_1[toCheck] & gXMLCharMask) != 0);
417    else {
418        if ((toCheck >= 0xD800) && (toCheck <= 0xDBFF))
419           if ((toCheck2 >= 0xDC00) && (toCheck2 <= 0xDFFF))
420               return true;
421    }
422    return false;
423}
424
425inline bool XMLChar1_1::isWhitespace(const XMLCh toCheck, const XMLCh toCheck2)
426{
427    if (!toCheck2)
428        return ((fgCharCharsTable1_1[toCheck] & gWhitespaceCharMask) != 0);
429    return false;
430}
431
432inline bool XMLChar1_1::isControlChar(const XMLCh toCheck, const XMLCh toCheck2)
433{
434    if (!toCheck2)
435        return ((fgCharCharsTable1_1[toCheck] & gControlCharMask) != 0);
436    return false;
437}
438
439
440XERCES_CPP_NAMESPACE_END
441
442#endif
Note: See TracBrowser for help on using the repository browser.