source: NonGTP/Xerces/xerces-c_2_8_0/include/xercesc/framework/XMLRecognizer.hpp @ 2674

Revision 2674, 5.3 KB checked in by mattausch, 16 years ago (diff)
Line 
1/*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements.  See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License.  You may obtain a copy of the License at
8 *
9 *      http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18/*
19 *  $Id: XMLRecognizer.hpp 568078 2007-08-21 11:43:25Z amassari $
20 */
21
22#if !defined(XMLRECOGNIZER_HPP)
23#define XMLRECOGNIZER_HPP
24
25#include <xercesc/util/XercesDefs.hpp>
26#include <xercesc/util/PlatformUtils.hpp>
27
28XERCES_CPP_NAMESPACE_BEGIN
29
30/**
31 *  This class provides some simple code to recognize the encodings of
32 *  XML files. This recognition only does very basic sensing of the encoding
33 *  in a broad sense. Basically its just enough to let us get started and
34 *  read the XMLDecl line. The scanner, once it reads the XMLDecl, will
35 *  tell the reader any actual encoding string it found and the reader can
36 *  update itself to be more specific at that point.
37 */
38class XMLPARSER_EXPORT XMLRecognizer
39{
40public :
41    // -----------------------------------------------------------------------
42    //  Class types
43    //
44    //  This enum represents the various encoding families that we have to
45    //  deal with individually at the scanner level. This does not indicate
46    //  the exact encoding, just the rough family that would let us scan
47    //  the XML/TextDecl to find the encoding string.
48    //
49    //  The 'L's and 'B's stand for little or big endian. We conditionally
50    //  create versions that will automatically map to the local UTF-16 and
51    //  UCS-4 endian modes.
52    //
53    //  OtherEncoding means that its some transcoder based encoding, i.e. not
54    //  one of the ones that we do internally. Its a special case and should
55    //  never be used directly outside of the reader.
56    //
57    //  NOTE: Keep this in sync with the name map array in the Cpp file!!
58    // -----------------------------------------------------------------------
59    enum Encodings
60    {
61        EBCDIC          = 0
62        , UCS_4B        = 1
63        , UCS_4L        = 2
64        , US_ASCII      = 3
65        , UTF_8         = 4
66        , UTF_16B       = 5
67        , UTF_16L       = 6
68        , XERCES_XMLCH  = 7
69
70        , Encodings_Count
71        , Encodings_Min = EBCDIC
72        , Encodings_Max = XERCES_XMLCH
73
74        , OtherEncoding = 999
75
76        #if defined(ENDIANMODE_BIG)
77        , Def_UTF16     = UTF_16B
78        , Def_UCS4      = UCS_4B
79        #else
80        , Def_UTF16     = UTF_16L
81        , Def_UCS4      = UCS_4L
82        #endif
83    };
84
85
86    // -----------------------------------------------------------------------
87    //  Public, const static data
88    //
89    //  These are the byte sequences for each of the encodings that we can
90    //  auto sense, and their lengths.
91    // -----------------------------------------------------------------------
92    static const char           fgASCIIPre[];
93    static const unsigned int   fgASCIIPreLen;
94    static const XMLByte        fgEBCDICPre[];
95    static const unsigned int   fgEBCDICPreLen;
96    static const XMLByte        fgUTF16BPre[];
97    static const XMLByte        fgUTF16LPre[];
98    static const unsigned int   fgUTF16PreLen;
99    static const XMLByte        fgUCS4BPre[];
100    static const XMLByte        fgUCS4LPre[];
101    static const unsigned int   fgUCS4PreLen;
102    static const char           fgUTF8BOM[];
103    static const unsigned int   fgUTF8BOMLen;
104
105
106    // -----------------------------------------------------------------------
107    //  Encoding recognition methods
108    // -----------------------------------------------------------------------
109    static Encodings basicEncodingProbe
110    (
111        const   XMLByte* const      rawBuffer
112        , const unsigned int        rawByteCount
113    );
114
115    static Encodings encodingForName
116    (
117        const   XMLCh* const    theEncName
118    );
119
120    static const XMLCh* nameForEncoding(const Encodings theEncoding
121        , MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager);
122
123
124protected :
125    // -----------------------------------------------------------------------
126    //  Unimplemented constructors, operators, and destructor
127    //
128    //  This class is effectively being used as a namespace for some static
129    //  methods.
130    //
131    //   (these functions are protected rather than private only to get rid of
132    //    some annoying compiler warnings.)
133    //
134    // -----------------------------------------------------------------------
135    XMLRecognizer();
136    ~XMLRecognizer();
137
138private:
139    // -----------------------------------------------------------------------
140    //  Unimplemented constructors and operators
141    // -----------------------------------------------------------------------
142    XMLRecognizer(const XMLRecognizer&);   
143    XMLRecognizer& operator=(const XMLRecognizer&);
144};
145
146XERCES_CPP_NAMESPACE_END
147
148#endif
Note: See TracBrowser for help on using the repository browser.