source: NonGTP/Xerces/xercesc/util/TransService.hpp @ 188

Revision 188, 22.4 KB checked in by mattausch, 19 years ago (diff)

added xercesc to support

Line 
1/*
2 * The Apache Software License, Version 1.1
3 *
4 * Copyright (c) 1999-2001 The Apache Software Foundation.  All rights
5 * reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 *
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in
16 *    the documentation and/or other materials provided with the
17 *    distribution.
18 *
19 * 3. The end-user documentation included with the redistribution,
20 *    if any, must include the following acknowledgment:
21 *       "This product includes software developed by the
22 *        Apache Software Foundation (http://www.apache.org/)."
23 *    Alternately, this acknowledgment may appear in the software itself,
24 *    if and wherever such third-party acknowledgments normally appear.
25 *
26 * 4. The names "Xerces" and "Apache Software Foundation" must
27 *    not be used to endorse or promote products derived from this
28 *    software without prior written permission. For written
29 *    permission, please contact apache\@apache.org.
30 *
31 * 5. Products derived from this software may not be called "Apache",
32 *    nor may "Apache" appear in their name, without prior written
33 *    permission of the Apache Software Foundation.
34 *
35 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
36 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
37 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
38 * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
39 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
40 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
41 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
42 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
43 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
44 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
45 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
46 * SUCH DAMAGE.
47 * ====================================================================
48 *
49 * This software consists of voluntary contributions made by many
50 * individuals on behalf of the Apache Software Foundation, and was
51 * originally based on software copyright (c) 1999, International
52 * Business Machines, Inc., http://www.ibm.com .  For more information
53 * on the Apache Software Foundation, please see
54 * <http://www.apache.org/>.
55 */
56
57/*
58 * $Log: TransService.hpp,v $
59 * Revision 1.12  2004/01/29 11:48:46  cargilld
60 * Code cleanup changes to get rid of various compiler diagnostic messages.
61 *
62 * Revision 1.11  2003/12/24 15:24:13  cargilld
63 * More updates to memory management so that the static memory manager.
64 *
65 * Revision 1.10  2003/11/24 19:52:06  neilg
66 * allow classes derived from XMLTransService to tailor the intrinsic maps to their taste.
67 *
68 * Revision 1.9  2003/06/03 18:12:29  knoaman
69 * Add default value for memory manager argument.
70 *
71 * Revision 1.8  2003/05/15 19:07:45  knoaman
72 * Partial implementation of the configurable memory manager.
73 *
74 * Revision 1.7  2003/03/07 18:11:55  tng
75 * Return a reference instead of void for operator=
76 *
77 * Revision 1.6  2003/02/04 22:11:52  peiyongz
78 * bug#16784: Obsolete documentation on XMLTranscoder -- reported by
79 * Colin Paul Adams, Preston Lancashire
80 *
81 * Revision 1.5  2002/11/25 21:27:52  tng
82 * Performance: use XMLRecognizer::Encodings enum to make new transcode, faster than comparing the encoding string every time.
83 *
84 * Revision 1.4  2002/11/04 15:22:04  tng
85 * C++ Namespace Support.
86 *
87 * Revision 1.3  2002/07/18 20:05:31  knoaman
88 * Add a new feature to control strict IANA encoding name.
89 *
90 * Revision 1.2  2002/04/09 15:44:00  knoaman
91 * Add lower case string support.
92 *
93 * Revision 1.1.1.1  2002/02/01 22:22:13  peiyongz
94 * sane_include
95 *
96 * Revision 1.14  2001/11/01 23:37:07  jasons
97 * 2001-11-01  Jason E. Stewart  <jason@openinformatics.com>
98 *
99 *      * src/util/TransService.hpp (Repository):
100 *      Updated Doxygen documentation for XMLTranscoder class
101 *
102 * Revision 1.13  2001/05/11 13:26:30  tng
103 * Copyright update.
104 *
105 * Revision 1.12  2001/01/25 19:19:32  tng
106 * Let user add their encoding to the intrinsic mapping table.  Added by Khaled Noaman.
107 *
108 * Revision 1.11  2000/04/12 22:57:45  roddey
109 * A couple of fixes to comments and parameter names to make them
110 * more correct.
111 *
112 * Revision 1.10  2000/03/28 19:43:19  roddey
113 * Fixes for signed/unsigned warnings. New work for two way transcoding
114 * stuff.
115 *
116 * Revision 1.9  2000/03/17 23:59:54  roddey
117 * Initial updates for two way transcoding support
118 *
119 * Revision 1.8  2000/03/02 19:54:46  roddey
120 * This checkin includes many changes done while waiting for the
121 * 1.1.0 code to be finished. I can't list them all here, but a list is
122 * available elsewhere.
123 *
124 * Revision 1.7  2000/02/24 20:05:25  abagchi
125 * Swat for removing Log from API docs
126 *
127 * Revision 1.6  2000/02/06 07:48:04  rahulj
128 * Year 2K copyright swat.
129 *
130 * Revision 1.5  2000/01/25 22:49:55  roddey
131 * Moved the supportsSrcOfs() method from the individual transcoder to the
132 * transcoding service, where it should have been to begin with.
133 *
134 * Revision 1.4  2000/01/25 19:19:07  roddey
135 * Simple addition of a getId() method to the xcode and netacess abstractions to
136 * allow each impl to give back an id string.
137 *
138 * Revision 1.3  1999/12/18 00:18:10  roddey
139 * More changes to support the new, completely orthagonal support for
140 * intrinsic encodings.
141 *
142 * Revision 1.2  1999/12/15 19:41:28  roddey
143 * Support for the new transcoder system, where even intrinsic encodings are
144 * done via the same transcoder abstraction as external ones.
145 *
146 * Revision 1.1.1.1  1999/11/09 01:05:16  twl
147 * Initial checkin
148 *
149 * Revision 1.2  1999/11/08 20:45:16  rahul
150 * Swat for adding in Product name and CVS comment log variable.
151 *
152 */
153
154#ifndef TRANSSERVICE_HPP
155#define TRANSSERVICE_HPP
156
157#include <xercesc/util/XMemory.hpp>
158#include <xercesc/util/PlatformUtils.hpp>
159#include <xercesc/framework/XMLRecognizer.hpp>
160#include <xercesc/util/RefHashTableOf.hpp>
161#include <xercesc/util/RefVectorOf.hpp>
162
163XERCES_CPP_NAMESPACE_BEGIN
164
165// Forward references
166//class XMLPlatformUtils;
167class XMLLCPTranscoder;
168class XMLTranscoder;
169class ENameMap;
170
171
172//
173//  This class is an abstract base class which are used to abstract the
174//  transcoding services that Xerces uses. The parser's actual transcoding
175//  needs are small so it is desirable to allow different implementations
176//  to be provided.
177//
178//  The transcoding service has to provide a couple of required string
179//  and character operations, but its most important service is the creation
180//  of transcoder objects. There are two types of transcoders, which are
181//  discussed below in the XMLTranscoder class' description.
182//
183class XMLUTIL_EXPORT XMLTransService : public XMemory
184{
185public :
186    // -----------------------------------------------------------------------
187    //  Class specific types
188    // -----------------------------------------------------------------------
189    enum Codes
190    {
191        Ok
192        , UnsupportedEncoding
193        , InternalFailure
194        , SupportFilesNotFound
195    };
196
197    struct TransRec
198    {
199        XMLCh       intCh;
200        XMLByte     extCh;
201    };
202
203
204    // -----------------------------------------------------------------------
205    //  Public constructors and destructor
206    // -----------------------------------------------------------------------
207    virtual ~XMLTransService();
208
209
210    // -----------------------------------------------------------------------
211    //  Non-virtual API
212    // -----------------------------------------------------------------------
213    XMLTranscoder* makeNewTranscoderFor
214    (
215        const   XMLCh* const            encodingName
216        ,       XMLTransService::Codes& resValue
217        , const unsigned int            blockSize
218        , MemoryManager* const          manager = XMLPlatformUtils::fgMemoryManager
219    );
220
221    XMLTranscoder* makeNewTranscoderFor
222    (
223        const   char* const             encodingName
224        ,       XMLTransService::Codes& resValue
225        , const unsigned int            blockSize
226        , MemoryManager* const          manager = XMLPlatformUtils::fgMemoryManager
227    );
228
229    XMLTranscoder* makeNewTranscoderFor
230    (
231        XMLRecognizer::Encodings        encodingEnum
232        ,       XMLTransService::Codes& resValue
233        , const unsigned int            blockSize
234        , MemoryManager* const          manager = XMLPlatformUtils::fgMemoryManager
235    );
236
237
238    // -----------------------------------------------------------------------
239    //  The virtual transcoding service API
240    // -----------------------------------------------------------------------
241    virtual int compareIString
242    (
243        const   XMLCh* const    comp1
244        , const XMLCh* const    comp2
245    ) = 0;
246
247    virtual int compareNIString
248    (
249        const   XMLCh* const    comp1
250        , const XMLCh* const    comp2
251        , const unsigned int    maxChars
252    ) = 0;
253
254    virtual const XMLCh* getId() const = 0;
255
256    virtual bool isSpace(const XMLCh toCheck) const = 0;
257
258    virtual XMLLCPTranscoder* makeNewLCPTranscoder() = 0;
259
260    virtual bool supportsSrcOfs() const = 0;
261
262    virtual void upperCase(XMLCh* const toUpperCase) const = 0;
263    virtual void lowerCase(XMLCh* const toLowerCase) const = 0;
264
265        // -----------------------------------------------------------------------
266    //  Allow users to add their own encodings to the intrinsinc mapping
267        //      table
268        //      Usage:
269        //              XMLTransService::addEncoding (
270        //                      gMyEncodingNameString
271    //                  , new ENameMapFor<MyTransClassType>(gMyEncodingNameString)
272        //              );
273    // -----------------------------------------------------------------------
274        static void addEncoding(const XMLCh* const encoding, ENameMap* const ownMapping);
275
276
277protected :
278    // -----------------------------------------------------------------------
279    //  Hidden constructors
280    // -----------------------------------------------------------------------
281    XMLTransService();
282
283
284    // -----------------------------------------------------------------------
285    //  Protected virtual methods.
286    // -----------------------------------------------------------------------
287    virtual XMLTranscoder* makeNewXMLTranscoder
288    (
289        const   XMLCh* const            encodingName
290        ,       XMLTransService::Codes& resValue
291        , const unsigned int            blockSize
292        , MemoryManager* const          manager
293    ) = 0;
294
295    // -----------------------------------------------------------------------
296    //  Protected init method for platform utils to call
297    // -----------------------------------------------------------------------
298    friend class XMLPlatformUtils;
299    virtual void initTransService();
300
301    // -----------------------------------------------------------------------
302    // protected static members
303    //  gMappings
304    //      This is a hash table of ENameMap objects. It is created and filled
305    //      in when the platform init calls our initTransService() method.
306    //
307    //  gMappingsRecognizer
308    //      This is an array of ENameMap objects, predefined for those
309    //      already recognized by XMLRecognizer::Encodings.
310    //
311
312    static RefHashTableOf<ENameMap>*    gMappings;
313    static RefVectorOf<ENameMap>*       gMappingsRecognizer;
314
315private :
316    // -----------------------------------------------------------------------
317    //  Unimplemented constructors and operators
318    // -----------------------------------------------------------------------
319    XMLTransService(const XMLTransService&);
320    XMLTransService& operator=(const XMLTransService&);
321
322    // -----------------------------------------------------------------------
323    //  Hidden method to enable/disable strict IANA encoding check
324    //  Caller: XMLPlatformUtils
325    // -----------------------------------------------------------------------
326    void strictIANAEncoding(const bool newState);
327    bool isStrictIANAEncoding();
328    static void reinitMappings();
329    static void reinitMappingsRecognizer();
330
331};
332
333
334
335/**
336  * <code>DOMString</code> is the generic string class that stores all strings
337  * used in the DOM C++ API.
338  *
339  * Though this class supports most of the common string operations to manipulate
340  * strings, it is not meant to be a comphrehensive string class.
341  */
342
343/**
344  *   <code>XMLTranscoder</code> is for transcoding non-local code
345  *   page encodings, i.e.  named encodings. These are used internally
346  *   by the scanner to internalize raw XML into the internal Unicode
347  *   format, and by writer classes to convert that internal Unicode
348  *   format (which comes out of the parser) back out to a format that
349  *   the receiving client code wants to use.
350  */
351class XMLUTIL_EXPORT XMLTranscoder : public XMemory
352{
353public :
354
355        /**
356         * This enum is used by the <code>transcodeTo()</code> method
357         * to indicate how to react to unrepresentable characters. The
358         * <code>transcodeFrom()</code> method always works the
359         * same. It will consider any invalid data to be an error and
360         * throw.
361         */
362    enum UnRepOpts
363    {
364        UnRep_Throw             /**< Throw an exception */
365        , UnRep_RepChar         /**< Use the replacement char */
366    };
367
368
369        /** @name Destructor. */
370        //@{
371
372         /**
373          * Destructor for XMLTranscoder
374          *
375          */
376    virtual ~XMLTranscoder();
377        //@}
378
379
380
381    /** @name The virtual transcoding interface */
382    //@{
383
384    /** Converts from the encoding of the service to the internal XMLCh* encoding
385      *
386      * @param srcData the source buffer to be transcoded
387      * @param srcCount number of characters in the source buffer
388      * @param toFill the destination buffer
389      * @param maxChars the max number of characters in the destination buffer
390      * @param bytesEaten after transcoding, this will hold the number of bytes
391      *    that were processed from the source buffer
392      * @param charSizes an array which must be at least as big as maxChars
393      *    into which will be inserted values that indicate how many
394      *    bytes from the input went into each XMLCh that was created
395      *    into toFill. Since many encodings use variable numbers of
396      *    byte per character, this provides a means to find out what
397      *    bytes in the input went into making a particular output
398      *    UTF-16 character.
399      * @return Returns the number of chars put into the target buffer
400      */
401
402
403    virtual unsigned int transcodeFrom
404    (
405        const   XMLByte* const          srcData
406        , const unsigned int            srcCount
407        ,       XMLCh* const            toFill
408        , const unsigned int            maxChars
409        ,       unsigned int&           bytesEaten
410        ,       unsigned char* const    charSizes
411    ) = 0;
412
413    /** Converts from the internal XMLCh* encoding to the encoding of the service
414      *
415      * @param srcData    the source buffer to be transcoded
416      * @param srcCount   number of characters in the source buffer
417      * @param toFill     the destination buffer
418      * @param maxBytes   the max number of bytes in the destination buffer
419      * @param charsEaten after transcoding, this will hold the number of chars
420      *    that were processed from the source buffer
421      * @param options    options to pass to the transcoder that explain how to
422      *    respond to an unrepresentable character
423      * @return Returns the number of chars put into the target buffer
424      */
425
426    virtual unsigned int transcodeTo
427    (
428        const   XMLCh* const    srcData
429        , const unsigned int    srcCount
430        ,       XMLByte* const  toFill
431        , const unsigned int    maxBytes
432        ,       unsigned int&   charsEaten
433        , const UnRepOpts       options
434    ) = 0;
435
436    /** Query whether the transcoder can handle a given character
437      *
438      * @param toCheck   the character code point to check
439      */
440
441    virtual bool canTranscodeTo
442    (
443        const   unsigned int    toCheck
444    )   const = 0;
445
446    //@}
447
448    /** @name Getter methods */
449    //@{
450
451    /** Get the internal block size
452     *
453       * @return The block size indicated in the constructor.
454       */
455    unsigned int getBlockSize() const;
456
457    /** Get the encoding name
458      *
459      * @return the name of the encoding that this
460      *    <code>XMLTranscoder</code> object is for
461      */
462    const XMLCh* getEncodingName() const;
463        //@}
464
465    /** @name Getter methods*/
466    //@{
467
468    /** Get the plugged-in memory manager
469      *
470      * This method returns the plugged-in memory manager user for dynamic
471      * memory allocation/deallocation.
472      *
473      * @return the plugged-in memory manager
474      */
475    MemoryManager* getMemoryManager() const;
476
477        //@}
478
479protected :
480    // -----------------------------------------------------------------------
481    //  Hidden constructors
482    // -----------------------------------------------------------------------
483    XMLTranscoder
484    (
485        const   XMLCh* const    encodingName
486        , const unsigned int    blockSize
487        , MemoryManager* const  manager = XMLPlatformUtils::fgMemoryManager
488    );
489
490
491    // -----------------------------------------------------------------------
492    //  Protected helper methods
493    // -----------------------------------------------------------------------
494    // As the body of this function is commented out it could be removed.
495    // However, currently all calls to it are guarded by #if defined(XERCES_DEBUG)
496    // so will leave it for now.
497    void checkBlockSize(const unsigned int toCheck);
498
499
500private :
501    // -----------------------------------------------------------------------
502    //  Unimplemented constructors and operators
503    // -----------------------------------------------------------------------
504    XMLTranscoder(const XMLTranscoder&);
505    XMLTranscoder& operator=(const XMLTranscoder&);
506
507    // -----------------------------------------------------------------------
508    //  Private data members
509    //
510    //  fBlockSize
511    //      This is the block size indicated in the constructor.
512    //
513    //  fEncodingName
514    //      This is the name of the encoding this encoder is for. All basic
515    //      XML transcoder's are for named encodings.
516    // -----------------------------------------------------------------------
517    unsigned int    fBlockSize;
518    XMLCh*          fEncodingName;
519    MemoryManager*  fMemoryManager;
520};
521
522
523//
524//  This class is a specialized transcoder that only transcodes between
525//  the internal XMLCh format and the local code page. It is specialized
526//  for the very common job of translating data from the client app's
527//  native code page to the internal format and vice versa.
528//
529class XMLUTIL_EXPORT XMLLCPTranscoder : public XMemory
530{
531public :
532    // -----------------------------------------------------------------------
533    //  Public constructors and destructor
534    // -----------------------------------------------------------------------
535    virtual ~XMLLCPTranscoder();
536
537
538    // -----------------------------------------------------------------------
539    //  The virtual transcoder API
540    //
541    //  NOTE:   All these APIs don't include null terminator characters in
542    //          their parameters. So calcRequiredSize() returns the number
543    //          of actual chars, not including the null. maxBytes and maxChars
544    //          parameters refer to actual chars, not including the null so
545    //          its assumed that the buffer is physically one char or byte
546    //          larger.
547    // -----------------------------------------------------------------------
548    virtual unsigned int calcRequiredSize(const char* const srcText
549        , MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) = 0;
550
551    virtual unsigned int calcRequiredSize(const XMLCh* const srcText
552        , MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) = 0;
553
554    virtual char* transcode(const XMLCh* const toTranscode) = 0;
555    virtual char* transcode(const XMLCh* const toTranscode,
556                            MemoryManager* const manager) = 0;
557
558    virtual XMLCh* transcode(const char* const toTranscode) = 0;
559    virtual XMLCh* transcode(const char* const toTranscode,
560                             MemoryManager* const manager) = 0;
561
562    virtual bool transcode
563    (
564        const   char* const     toTranscode
565        ,       XMLCh* const    toFill
566        , const unsigned int    maxChars
567        , MemoryManager* const  manager = XMLPlatformUtils::fgMemoryManager
568    ) = 0;
569
570    virtual bool transcode
571    (
572        const   XMLCh* const    toTranscode
573        ,       char* const     toFill
574        , const unsigned int    maxBytes
575        , MemoryManager* const  manager = XMLPlatformUtils::fgMemoryManager
576    ) = 0;
577
578
579protected :
580    // -----------------------------------------------------------------------
581    //  Hidden constructors
582    // -----------------------------------------------------------------------
583    XMLLCPTranscoder();
584
585
586private :
587    // -----------------------------------------------------------------------
588    //  Unimplemented constructors and operators
589    // -----------------------------------------------------------------------
590    XMLLCPTranscoder(const XMLLCPTranscoder&);
591    XMLLCPTranscoder& operator=(const XMLLCPTranscoder&);
592};
593
594
595// ---------------------------------------------------------------------------
596//  XMLTranscoder: Getter methods
597// ---------------------------------------------------------------------------
598inline MemoryManager* XMLTranscoder::getMemoryManager() const
599{
600    return fMemoryManager;
601}
602
603// ---------------------------------------------------------------------------
604//  XMLTranscoder: Protected helper methods
605// ---------------------------------------------------------------------------
606inline unsigned int XMLTranscoder::getBlockSize() const
607{
608    return fBlockSize;
609}
610
611inline const XMLCh* XMLTranscoder::getEncodingName() const
612{
613    return fEncodingName;
614}
615
616XERCES_CPP_NAMESPACE_END
617
618#endif
Note: See TracBrowser for help on using the repository browser.