source: NonGTP/Xerces/xercesc/framework/XMLRecognizer.hpp @ 188

Revision 188, 7.3 KB checked in by mattausch, 20 years ago (diff)

added xercesc to support

Line 
1/*
2 * The Apache Software License, Version 1.1
3 *
4 * Copyright (c) 1999-2000 The Apache Software Foundation.  All rights
5 * reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 *
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in
16 *    the documentation and/or other materials provided with the
17 *    distribution.
18 *
19 * 3. The end-user documentation included with the redistribution,
20 *    if any, must include the following acknowledgment:
21 *       "This product includes software developed by the
22 *        Apache Software Foundation (http://www.apache.org/)."
23 *    Alternately, this acknowledgment may appear in the software itself,
24 *    if and wherever such third-party acknowledgments normally appear.
25 *
26 * 4. The names "Xerces" and "Apache Software Foundation" must
27 *    not be used to endorse or promote products derived from this
28 *    software without prior written permission. For written
29 *    permission, please contact apache\@apache.org.
30 *
31 * 5. Products derived from this software may not be called "Apache",
32 *    nor may "Apache" appear in their name, without prior written
33 *    permission of the Apache Software Foundation.
34 *
35 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
36 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
37 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
38 * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
39 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
40 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
41 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
42 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
43 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
44 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
45 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
46 * SUCH DAMAGE.
47 * ====================================================================
48 *
49 * This software consists of voluntary contributions made by many
50 * individuals on behalf of the Apache Software Foundation, and was
51 * originally based on software copyright (c) 1999, International
52 * Business Machines, Inc., http://www.ibm.com .  For more information
53 * on the Apache Software Foundation, please see
54 * <http://www.apache.org/>.
55 */
56
57/*
58 *  $Id: XMLRecognizer.hpp,v 1.6 2004/01/29 11:46:29 cargilld Exp $
59 */
60
61#if !defined(XMLRECOGNIZER_HPP)
62#define XMLRECOGNIZER_HPP
63
64#include <xercesc/util/XercesDefs.hpp>
65#include <xercesc/util/PlatformUtils.hpp>
66
67XERCES_CPP_NAMESPACE_BEGIN
68
69/**
70 *  This class provides some simple code to recognize the encodings of
71 *  XML files. This recognition only does very basic sensing of the encoding
72 *  in a broad sense. Basically its just enough to let us get started and
73 *  read the XMLDecl line. The scanner, once it reads the XMLDecl, will
74 *  tell the reader any actual encoding string it found and the reader can
75 *  update itself to be more specific at that point.
76 */
77class XMLPARSER_EXPORT XMLRecognizer
78{
79public :
80    // -----------------------------------------------------------------------
81    //  Class types
82    //
83    //  This enum represents the various encoding families that we have to
84    //  deal with individually at the scanner level. This does not indicate
85    //  the exact encoding, just the rough family that would let us scan
86    //  the XML/TextDecl to find the encoding string.
87    //
88    //  The 'L's and 'B's stand for little or big endian. We conditionally
89    //  create versions that will automatically map to the local UTF-16 and
90    //  UCS-4 endian modes.
91    //
92    //  OtherEncoding means that its some transcoder based encoding, i.e. not
93    //  one of the ones that we do internally. Its a special case and should
94    //  never be used directly outside of the reader.
95    //
96    //  NOTE: Keep this in sync with the name map array in the Cpp file!!
97    // -----------------------------------------------------------------------
98    enum Encodings
99    {
100        EBCDIC          = 0
101        , UCS_4B        = 1
102        , UCS_4L        = 2
103        , US_ASCII      = 3
104        , UTF_8         = 4
105        , UTF_16B       = 5
106        , UTF_16L       = 6
107        , XERCES_XMLCH  = 7
108
109        , Encodings_Count
110        , Encodings_Min = EBCDIC
111        , Encodings_Max = XERCES_XMLCH
112
113        , OtherEncoding = 999
114
115        #if defined(ENDIANMODE_BIG)
116        , Def_UTF16     = UTF_16B
117        , Def_UCS4      = UCS_4B
118        #else
119        , Def_UTF16     = UTF_16L
120        , Def_UCS4      = UCS_4L
121        #endif
122    };
123
124
125    // -----------------------------------------------------------------------
126    //  Public, const static data
127    //
128    //  These are the byte sequences for each of the encodings that we can
129    //  auto sense, and their lengths.
130    // -----------------------------------------------------------------------
131    static const char           fgASCIIPre[];
132    static const unsigned int   fgASCIIPreLen;
133    static const XMLByte        fgEBCDICPre[];
134    static const unsigned int   fgEBCDICPreLen;
135    static const XMLByte        fgUTF16BPre[];
136    static const XMLByte        fgUTF16LPre[];
137    static const unsigned int   fgUTF16PreLen;
138    static const XMLByte        fgUCS4BPre[];
139    static const XMLByte        fgUCS4LPre[];
140    static const unsigned int   fgUCS4PreLen;
141    static const char           fgUTF8BOM[];
142    static const unsigned int   fgUTF8BOMLen;
143
144
145    // -----------------------------------------------------------------------
146    //  Encoding recognition methods
147    // -----------------------------------------------------------------------
148    static Encodings basicEncodingProbe
149    (
150        const   XMLByte* const      rawBuffer
151        , const unsigned int        rawByteCount
152    );
153
154    static Encodings encodingForName
155    (
156        const   XMLCh* const    theEncName
157    );
158
159    static const XMLCh* nameForEncoding(const Encodings theEncoding
160        , MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager);
161
162
163protected :
164    // -----------------------------------------------------------------------
165    //  Unimplemented constructors, operators, and destructor
166    //
167    //  This class is effectively being used as a namespace for some static
168    //  methods.
169    //
170    //   (these functions are protected rather than private only to get rid of
171    //    some annoying compiler warnings.)
172    //
173    // -----------------------------------------------------------------------
174    XMLRecognizer();
175    ~XMLRecognizer();
176
177private:
178    // -----------------------------------------------------------------------
179    //  Unimplemented constructors and operators
180    // -----------------------------------------------------------------------
181    XMLRecognizer(const XMLRecognizer&);   
182    XMLRecognizer& operator=(const XMLRecognizer&);
183};
184
185XERCES_CPP_NAMESPACE_END
186
187#endif
Note: See TracBrowser for help on using the repository browser.