source: NonGTP/Xerces/xerces/include/xercesc/util/TransService.hpp @ 358

Revision 358, 20.5 KB checked in by bittner, 19 years ago (diff)

xerces added

Line 
1/*
2 * Copyright 1999-2001,2004 The Apache Software Foundation.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17/*
18 * $Log: TransService.hpp,v $
19 * Revision 1.14  2004/09/08 13:56:23  peiyongz
20 * Apache License Version 2.0
21 *
22 * Revision 1.13  2004/05/19 20:51:20  peiyongz
23 * XML1.0-3rd Edition: UTF_8
24 *
25 * Revision 1.12  2004/01/29 11:48:46  cargilld
26 * Code cleanup changes to get rid of various compiler diagnostic messages.
27 *
28 * Revision 1.11  2003/12/24 15:24:13  cargilld
29 * More updates to memory management so that the static memory manager.
30 *
31 * Revision 1.10  2003/11/24 19:52:06  neilg
32 * allow classes derived from XMLTransService to tailor the intrinsic maps to their taste.
33 *
34 * Revision 1.9  2003/06/03 18:12:29  knoaman
35 * Add default value for memory manager argument.
36 *
37 * Revision 1.8  2003/05/15 19:07:45  knoaman
38 * Partial implementation of the configurable memory manager.
39 *
40 * Revision 1.7  2003/03/07 18:11:55  tng
41 * Return a reference instead of void for operator=
42 *
43 * Revision 1.6  2003/02/04 22:11:52  peiyongz
44 * bug#16784: Obsolete documentation on XMLTranscoder -- reported by
45 * Colin Paul Adams, Preston Lancashire
46 *
47 * Revision 1.5  2002/11/25 21:27:52  tng
48 * Performance: use XMLRecognizer::Encodings enum to make new transcode, faster than comparing the encoding string every time.
49 *
50 * Revision 1.4  2002/11/04 15:22:04  tng
51 * C++ Namespace Support.
52 *
53 * Revision 1.3  2002/07/18 20:05:31  knoaman
54 * Add a new feature to control strict IANA encoding name.
55 *
56 * Revision 1.2  2002/04/09 15:44:00  knoaman
57 * Add lower case string support.
58 *
59 * Revision 1.1.1.1  2002/02/01 22:22:13  peiyongz
60 * sane_include
61 *
62 * Revision 1.14  2001/11/01 23:37:07  jasons
63 * 2001-11-01  Jason E. Stewart  <jason@openinformatics.com>
64 *
65 *      * src/util/TransService.hpp (Repository):
66 *      Updated Doxygen documentation for XMLTranscoder class
67 *
68 * Revision 1.13  2001/05/11 13:26:30  tng
69 * Copyright update.
70 *
71 * Revision 1.12  2001/01/25 19:19:32  tng
72 * Let user add their encoding to the intrinsic mapping table.  Added by Khaled Noaman.
73 *
74 * Revision 1.11  2000/04/12 22:57:45  roddey
75 * A couple of fixes to comments and parameter names to make them
76 * more correct.
77 *
78 * Revision 1.10  2000/03/28 19:43:19  roddey
79 * Fixes for signed/unsigned warnings. New work for two way transcoding
80 * stuff.
81 *
82 * Revision 1.9  2000/03/17 23:59:54  roddey
83 * Initial updates for two way transcoding support
84 *
85 * Revision 1.8  2000/03/02 19:54:46  roddey
86 * This checkin includes many changes done while waiting for the
87 * 1.1.0 code to be finished. I can't list them all here, but a list is
88 * available elsewhere.
89 *
90 * Revision 1.7  2000/02/24 20:05:25  abagchi
91 * Swat for removing Log from API docs
92 *
93 * Revision 1.6  2000/02/06 07:48:04  rahulj
94 * Year 2K copyright swat.
95 *
96 * Revision 1.5  2000/01/25 22:49:55  roddey
97 * Moved the supportsSrcOfs() method from the individual transcoder to the
98 * transcoding service, where it should have been to begin with.
99 *
100 * Revision 1.4  2000/01/25 19:19:07  roddey
101 * Simple addition of a getId() method to the xcode and netacess abstractions to
102 * allow each impl to give back an id string.
103 *
104 * Revision 1.3  1999/12/18 00:18:10  roddey
105 * More changes to support the new, completely orthagonal support for
106 * intrinsic encodings.
107 *
108 * Revision 1.2  1999/12/15 19:41:28  roddey
109 * Support for the new transcoder system, where even intrinsic encodings are
110 * done via the same transcoder abstraction as external ones.
111 *
112 * Revision 1.1.1.1  1999/11/09 01:05:16  twl
113 * Initial checkin
114 *
115 * Revision 1.2  1999/11/08 20:45:16  rahul
116 * Swat for adding in Product name and CVS comment log variable.
117 *
118 */
119
120#ifndef TRANSSERVICE_HPP
121#define TRANSSERVICE_HPP
122
123#include <xercesc/util/XMemory.hpp>
124#include <xercesc/util/PlatformUtils.hpp>
125#include <xercesc/framework/XMLRecognizer.hpp>
126#include <xercesc/util/RefHashTableOf.hpp>
127#include <xercesc/util/RefVectorOf.hpp>
128
129XERCES_CPP_NAMESPACE_BEGIN
130
131// Forward references
132//class XMLPlatformUtils;
133class XMLLCPTranscoder;
134class XMLTranscoder;
135class ENameMap;
136
137
138//
139//  This class is an abstract base class which are used to abstract the
140//  transcoding services that Xerces uses. The parser's actual transcoding
141//  needs are small so it is desirable to allow different implementations
142//  to be provided.
143//
144//  The transcoding service has to provide a couple of required string
145//  and character operations, but its most important service is the creation
146//  of transcoder objects. There are two types of transcoders, which are
147//  discussed below in the XMLTranscoder class' description.
148//
149class XMLUTIL_EXPORT XMLTransService : public XMemory
150{
151public :
152    // -----------------------------------------------------------------------
153    //  Class specific types
154    // -----------------------------------------------------------------------
155    enum Codes
156    {
157        Ok
158        , UnsupportedEncoding
159        , InternalFailure
160        , SupportFilesNotFound
161    };
162
163    struct TransRec
164    {
165        XMLCh       intCh;
166        XMLByte     extCh;
167    };
168
169
170    // -----------------------------------------------------------------------
171    //  Public constructors and destructor
172    // -----------------------------------------------------------------------
173    virtual ~XMLTransService();
174
175
176    // -----------------------------------------------------------------------
177    //  Non-virtual API
178    // -----------------------------------------------------------------------
179    XMLTranscoder* makeNewTranscoderFor
180    (
181        const   XMLCh* const            encodingName
182        ,       XMLTransService::Codes& resValue
183        , const unsigned int            blockSize
184        , MemoryManager* const          manager = XMLPlatformUtils::fgMemoryManager
185    );
186
187    XMLTranscoder* makeNewTranscoderFor
188    (
189        const   char* const             encodingName
190        ,       XMLTransService::Codes& resValue
191        , const unsigned int            blockSize
192        , MemoryManager* const          manager = XMLPlatformUtils::fgMemoryManager
193    );
194
195    XMLTranscoder* makeNewTranscoderFor
196    (
197        XMLRecognizer::Encodings        encodingEnum
198        ,       XMLTransService::Codes& resValue
199        , const unsigned int            blockSize
200        , MemoryManager* const          manager = XMLPlatformUtils::fgMemoryManager
201    );
202
203
204    // -----------------------------------------------------------------------
205    //  The virtual transcoding service API
206    // -----------------------------------------------------------------------
207    virtual int compareIString
208    (
209        const   XMLCh* const    comp1
210        , const XMLCh* const    comp2
211    ) = 0;
212
213    virtual int compareNIString
214    (
215        const   XMLCh* const    comp1
216        , const XMLCh* const    comp2
217        , const unsigned int    maxChars
218    ) = 0;
219
220    virtual const XMLCh* getId() const = 0;
221
222    virtual bool isSpace(const XMLCh toCheck) const = 0;
223
224    virtual XMLLCPTranscoder* makeNewLCPTranscoder() = 0;
225
226    virtual bool supportsSrcOfs() const = 0;
227
228    virtual void upperCase(XMLCh* const toUpperCase) const = 0;
229    virtual void lowerCase(XMLCh* const toLowerCase) const = 0;
230
231        // -----------------------------------------------------------------------
232    //  Allow users to add their own encodings to the intrinsinc mapping
233        //      table
234        //      Usage:
235        //              XMLTransService::addEncoding (
236        //                      gMyEncodingNameString
237    //                  , new ENameMapFor<MyTransClassType>(gMyEncodingNameString)
238        //              );
239    // -----------------------------------------------------------------------
240        static void addEncoding(const XMLCh* const encoding, ENameMap* const ownMapping);
241
242
243protected :
244    // -----------------------------------------------------------------------
245    //  Hidden constructors
246    // -----------------------------------------------------------------------
247    XMLTransService();
248
249
250    // -----------------------------------------------------------------------
251    //  Protected virtual methods.
252    // -----------------------------------------------------------------------
253    virtual XMLTranscoder* makeNewXMLTranscoder
254    (
255        const   XMLCh* const            encodingName
256        ,       XMLTransService::Codes& resValue
257        , const unsigned int            blockSize
258        , MemoryManager* const          manager
259    ) = 0;
260
261    // -----------------------------------------------------------------------
262    //  Protected init method for platform utils to call
263    // -----------------------------------------------------------------------
264    friend class XMLPlatformUtils;
265    virtual void initTransService();
266
267    // -----------------------------------------------------------------------
268    // protected static members
269    //  gMappings
270    //      This is a hash table of ENameMap objects. It is created and filled
271    //      in when the platform init calls our initTransService() method.
272    //
273    //  gMappingsRecognizer
274    //      This is an array of ENameMap objects, predefined for those
275    //      already recognized by XMLRecognizer::Encodings.
276    //
277
278    static RefHashTableOf<ENameMap>*    gMappings;
279    static RefVectorOf<ENameMap>*       gMappingsRecognizer;
280
281private :
282    // -----------------------------------------------------------------------
283    //  Unimplemented constructors and operators
284    // -----------------------------------------------------------------------
285    XMLTransService(const XMLTransService&);
286    XMLTransService& operator=(const XMLTransService&);
287
288    // -----------------------------------------------------------------------
289    //  Hidden method to enable/disable strict IANA encoding check
290    //  Caller: XMLPlatformUtils
291    // -----------------------------------------------------------------------
292    void strictIANAEncoding(const bool newState);
293    bool isStrictIANAEncoding();
294    static void reinitMappings();
295    static void reinitMappingsRecognizer();
296
297};
298
299
300
301/**
302  * <code>DOMString</code> is the generic string class that stores all strings
303  * used in the DOM C++ API.
304  *
305  * Though this class supports most of the common string operations to manipulate
306  * strings, it is not meant to be a comphrehensive string class.
307  */
308
309/**
310  *   <code>XMLTranscoder</code> is for transcoding non-local code
311  *   page encodings, i.e.  named encodings. These are used internally
312  *   by the scanner to internalize raw XML into the internal Unicode
313  *   format, and by writer classes to convert that internal Unicode
314  *   format (which comes out of the parser) back out to a format that
315  *   the receiving client code wants to use.
316  */
317class XMLUTIL_EXPORT XMLTranscoder : public XMemory
318{
319public :
320
321        /**
322         * This enum is used by the <code>transcodeTo()</code> method
323         * to indicate how to react to unrepresentable characters. The
324         * <code>transcodeFrom()</code> method always works the
325         * same. It will consider any invalid data to be an error and
326         * throw.
327         */
328    enum UnRepOpts
329    {
330        UnRep_Throw             /**< Throw an exception */
331        , UnRep_RepChar         /**< Use the replacement char */
332    };
333
334
335        /** @name Destructor. */
336        //@{
337
338         /**
339          * Destructor for XMLTranscoder
340          *
341          */
342    virtual ~XMLTranscoder();
343        //@}
344
345
346
347    /** @name The virtual transcoding interface */
348    //@{
349
350    /** Converts from the encoding of the service to the internal XMLCh* encoding
351      *
352      * @param srcData the source buffer to be transcoded
353      * @param srcCount number of bytes in the source buffer
354      * @param toFill the destination buffer
355      * @param maxChars the max number of characters in the destination buffer
356      * @param bytesEaten after transcoding, this will hold the number of bytes
357      *    that were processed from the source buffer
358      * @param charSizes an array which must be at least as big as maxChars
359      *    into which will be inserted values that indicate how many
360      *    bytes from the input went into each XMLCh that was created
361      *    into toFill. Since many encodings use variable numbers of
362      *    byte per character, this provides a means to find out what
363      *    bytes in the input went into making a particular output
364      *    UTF-16 character.
365      * @return Returns the number of chars put into the target buffer
366      */
367
368
369    virtual unsigned int transcodeFrom
370    (
371        const   XMLByte* const          srcData
372        , const unsigned int            srcCount
373        ,       XMLCh* const            toFill
374        , const unsigned int            maxChars
375        ,       unsigned int&           bytesEaten
376        ,       unsigned char* const    charSizes
377    ) = 0;
378
379    /** Converts from the internal XMLCh* encoding to the encoding of the service
380      *
381      * @param srcData    the source buffer to be transcoded
382      * @param srcCount   number of characters in the source buffer
383      * @param toFill     the destination buffer
384      * @param maxBytes   the max number of bytes in the destination buffer
385      * @param charsEaten after transcoding, this will hold the number of chars
386      *    that were processed from the source buffer
387      * @param options    options to pass to the transcoder that explain how to
388      *    respond to an unrepresentable character
389      * @return Returns the number of chars put into the target buffer
390      */
391
392    virtual unsigned int transcodeTo
393    (
394        const   XMLCh* const    srcData
395        , const unsigned int    srcCount
396        ,       XMLByte* const  toFill
397        , const unsigned int    maxBytes
398        ,       unsigned int&   charsEaten
399        , const UnRepOpts       options
400    ) = 0;
401
402    /** Query whether the transcoder can handle a given character
403      *
404      * @param toCheck   the character code point to check
405      */
406
407    virtual bool canTranscodeTo
408    (
409        const   unsigned int    toCheck
410    )   const = 0;
411
412    //@}
413
414    /** @name Getter methods */
415    //@{
416
417    /** Get the internal block size
418     *
419       * @return The block size indicated in the constructor.
420       */
421    unsigned int getBlockSize() const;
422
423    /** Get the encoding name
424      *
425      * @return the name of the encoding that this
426      *    <code>XMLTranscoder</code> object is for
427      */
428    const XMLCh* getEncodingName() const;
429        //@}
430
431    /** @name Getter methods*/
432    //@{
433
434    /** Get the plugged-in memory manager
435      *
436      * This method returns the plugged-in memory manager user for dynamic
437      * memory allocation/deallocation.
438      *
439      * @return the plugged-in memory manager
440      */
441    MemoryManager* getMemoryManager() const;
442
443        //@}
444
445protected :
446    // -----------------------------------------------------------------------
447    //  Hidden constructors
448    // -----------------------------------------------------------------------
449    XMLTranscoder
450    (
451        const   XMLCh* const    encodingName
452        , const unsigned int    blockSize
453        , MemoryManager* const  manager = XMLPlatformUtils::fgMemoryManager
454    );
455
456
457    // -----------------------------------------------------------------------
458    //  Protected helper methods
459    // -----------------------------------------------------------------------
460    // As the body of this function is commented out it could be removed.
461    // However, currently all calls to it are guarded by #if defined(XERCES_DEBUG)
462    // so will leave it for now.
463    void checkBlockSize(const unsigned int toCheck);
464
465
466private :
467    // -----------------------------------------------------------------------
468    //  Unimplemented constructors and operators
469    // -----------------------------------------------------------------------
470    XMLTranscoder(const XMLTranscoder&);
471    XMLTranscoder& operator=(const XMLTranscoder&);
472
473    // -----------------------------------------------------------------------
474    //  Private data members
475    //
476    //  fBlockSize
477    //      This is the block size indicated in the constructor.
478    //
479    //  fEncodingName
480    //      This is the name of the encoding this encoder is for. All basic
481    //      XML transcoder's are for named encodings.
482    // -----------------------------------------------------------------------
483    unsigned int    fBlockSize;
484    XMLCh*          fEncodingName;
485    MemoryManager*  fMemoryManager;
486};
487
488
489//
490//  This class is a specialized transcoder that only transcodes between
491//  the internal XMLCh format and the local code page. It is specialized
492//  for the very common job of translating data from the client app's
493//  native code page to the internal format and vice versa.
494//
495class XMLUTIL_EXPORT XMLLCPTranscoder : public XMemory
496{
497public :
498    // -----------------------------------------------------------------------
499    //  Public constructors and destructor
500    // -----------------------------------------------------------------------
501    virtual ~XMLLCPTranscoder();
502
503
504    // -----------------------------------------------------------------------
505    //  The virtual transcoder API
506    //
507    //  NOTE:   All these APIs don't include null terminator characters in
508    //          their parameters. So calcRequiredSize() returns the number
509    //          of actual chars, not including the null. maxBytes and maxChars
510    //          parameters refer to actual chars, not including the null so
511    //          its assumed that the buffer is physically one char or byte
512    //          larger.
513    // -----------------------------------------------------------------------
514    virtual unsigned int calcRequiredSize(const char* const srcText
515        , MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) = 0;
516
517    virtual unsigned int calcRequiredSize(const XMLCh* const srcText
518        , MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager) = 0;
519
520    virtual char* transcode(const XMLCh* const toTranscode) = 0;
521    virtual char* transcode(const XMLCh* const toTranscode,
522                            MemoryManager* const manager) = 0;
523
524    virtual XMLCh* transcode(const char* const toTranscode) = 0;
525    virtual XMLCh* transcode(const char* const toTranscode,
526                             MemoryManager* const manager) = 0;
527
528    virtual bool transcode
529    (
530        const   char* const     toTranscode
531        ,       XMLCh* const    toFill
532        , const unsigned int    maxChars
533        , MemoryManager* const  manager = XMLPlatformUtils::fgMemoryManager
534    ) = 0;
535
536    virtual bool transcode
537    (
538        const   XMLCh* const    toTranscode
539        ,       char* const     toFill
540        , const unsigned int    maxBytes
541        , MemoryManager* const  manager = XMLPlatformUtils::fgMemoryManager
542    ) = 0;
543
544
545protected :
546    // -----------------------------------------------------------------------
547    //  Hidden constructors
548    // -----------------------------------------------------------------------
549    XMLLCPTranscoder();
550
551
552private :
553    // -----------------------------------------------------------------------
554    //  Unimplemented constructors and operators
555    // -----------------------------------------------------------------------
556    XMLLCPTranscoder(const XMLLCPTranscoder&);
557    XMLLCPTranscoder& operator=(const XMLLCPTranscoder&);
558};
559
560
561// ---------------------------------------------------------------------------
562//  XMLTranscoder: Getter methods
563// ---------------------------------------------------------------------------
564inline MemoryManager* XMLTranscoder::getMemoryManager() const
565{
566    return fMemoryManager;
567}
568
569// ---------------------------------------------------------------------------
570//  XMLTranscoder: Protected helper methods
571// ---------------------------------------------------------------------------
572inline unsigned int XMLTranscoder::getBlockSize() const
573{
574    return fBlockSize;
575}
576
577inline const XMLCh* XMLTranscoder::getEncodingName() const
578{
579    return fEncodingName;
580}
581
582XERCES_CPP_NAMESPACE_END
583
584#endif
Note: See TracBrowser for help on using the repository browser.