source: NonGTP/Xerces/xercesc/framework/XMLFormatter.hpp @ 188

Revision 188, 21.2 KB checked in by mattausch, 19 years ago (diff)

added xercesc to support

Line 
1/*
2 * The Apache Software License, Version 1.1
3 *
4 * Copyright (c) 1999-2000 The Apache Software Foundation.  All rights
5 * reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 *
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in
16 *    the documentation and/or other materials provided with the
17 *    distribution.
18 *
19 * 3. The end-user documentation included with the redistribution,
20 *    if any, must include the following acknowledgment:
21 *       "This product includes software developed by the
22 *        Apache Software Foundation (http://www.apache.org/)."
23 *    Alternately, this acknowledgment may appear in the software itself,
24 *    if and wherever such third-party acknowledgments normally appear.
25 *
26 * 4. The names "Xerces" and "Apache Software Foundation" must
27 *    not be used to endorse or promote products derived from this
28 *    software without prior written permission. For written
29 *    permission, please contact apache\@apache.org.
30 *
31 * 5. Products derived from this software may not be called "Apache",
32 *    nor may "Apache" appear in their name, without prior written
33 *    permission of the Apache Software Foundation.
34 *
35 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
36 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
37 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
38 * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
39 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
40 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
41 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
42 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
43 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
44 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
45 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
46 * SUCH DAMAGE.
47 * ====================================================================
48 *
49 * This software consists of voluntary contributions made by many
50 * individuals on behalf of the Apache Software Foundation, and was
51 * originally based on software copyright (c) 1999, International
52 * Business Machines, Inc., http://www.ibm.com .  For more information
53 * on the Apache Software Foundation, please see
54 * <http://www.apache.org/>.
55 */
56
57/*
58 * $Log: XMLFormatter.hpp,v $
59 * Revision 1.20  2004/01/29 11:46:29  cargilld
60 * Code cleanup changes to get rid of various compiler diagnostic messages.
61 *
62 * Revision 1.19  2003/12/01 23:23:25  neilg
63 * fix for bug 25118; thanks to Jeroen Witmond
64 *
65 * Revision 1.18  2003/10/10 02:06:09  neilg
66 * fix for bug 21780; thanks to David Cargill
67 *
68 * Revision 1.17  2003/09/22 08:50:04  gareth
69 * doc fix
70 *
71 * Revision 1.16  2003/09/08 21:48:36  peiyongz
72 * Restore pre2.3 constructors
73 *
74 * Revision 1.15  2003/05/30 16:11:43  gareth
75 * Fixes so we compile under VC7.1. Patch by Alberto Massari.
76 *
77 * Revision 1.14  2003/05/16 21:36:55  knoaman
78 * Memory manager implementation: Modify constructors to pass in the memory manager.
79 *
80 * Revision 1.13  2003/05/15 18:26:07  knoaman
81 * Partial implementation of the configurable memory manager.
82 *
83 * Revision 1.12  2003/03/17 03:19:52  peiyongz
84 * Bug#18051 memory leakage in XMLFormatter
85 *
86 * Revision 1.11  2003/03/16 06:00:43  peiyongz
87 * Bug#17983 Formatter does not escape control characters
88 *
89 * Revision 1.10  2003/03/11 12:58:36  tng
90 * Fix compilation error on AIX.
91 *
92 * Revision 1.9  2003/03/07 21:42:37  tng
93 * [Bug 17589] Refactoring ... .  Patch from Jacques Legare.
94 *
95 * Revision 1.8  2003/03/07 18:08:10  tng
96 * Return a reference instead of void for operator=
97 *
98 * Revision 1.7  2003/01/31 00:30:48  jberry
99 * Syntax error in declaration
100 *
101 * Revision 1.6  2003/01/28 18:32:33  peiyongz
102 * Bug#13694: Allow Xerces to write the BOM to XML files
103 *
104 * Revision 1.5  2003/01/24 20:20:22  tng
105 * Add method flush to XMLFormatTarget
106 *
107 * Revision 1.4  2002/11/04 15:00:21  tng
108 * C++ Namespace Support.
109 *
110 * Revision 1.3  2002/07/30 16:29:16  tng
111 * [Bug 8550] No explanation of XMLFormatter escape options.
112 *
113 * Revision 1.2  2002/06/21 19:31:23  peiyongz
114 * getTranscoder() added;
115 *
116 * Revision 1.1.1.1  2002/02/01 22:21:52  peiyongz
117 * sane_include
118 *
119 * Revision 1.7  2000/10/17 19:25:38  andyh
120 * XMLFormatTarget, removed version of writeChars with no length.  Can not be
121 * safely used, and obscured other errors.
122 *
123 * Revision 1.6  2000/10/10 23:54:58  andyh
124 * XMLFormatter patch, contributed by Bill Schindler.  Fix problems with
125 * output to multi-byte encodings.
126 *
127 * Revision 1.5  2000/04/07 01:01:56  roddey
128 * Fixed an error message so that it indicated the correct radix for the rep
129 * token. Get all of the basic output formatting functionality in place for
130 * at least ICU and Win32 transcoders.
131 *
132 * Revision 1.4  2000/04/06 23:50:38  roddey
133 * Now the low level formatter handles doing char refs for
134 * unrepresentable chars (in addition to the replacement char style
135 * already done.)
136 *
137 * Revision 1.3  2000/04/06 19:09:21  roddey
138 * Some more improvements to output formatting. Now it will correctly
139 * handle doing the 'replacement char' style of dealing with chars
140 * that are unrepresentable.
141 *
142 * Revision 1.2  2000/04/05 00:20:16  roddey
143 * More updates for the low level formatted output support
144 *
145 * Revision 1.1  2000/03/28 19:43:17  roddey
146 * Fixes for signed/unsigned warnings. New work for two way transcoding
147 * stuff.
148 *
149 */
150
151#if !defined(XMLFORMATTER_HPP)
152#define XMLFORMATTER_HPP
153
154#include <xercesc/util/PlatformUtils.hpp>
155
156XERCES_CPP_NAMESPACE_BEGIN
157
158class XMLFormatTarget;
159class XMLTranscoder;
160
161/**
162 *  This class provides the basic formatting capabilities that are required
163 *  to turn the Unicode based XML data from the parsers into a form that can
164 *  be used on non-Unicode based systems, that is, into local or generic text
165 *  encodings.
166 *
167 *  A number of flags are provided to control whether various optional
168 *  formatting operations are performed.
169 */
170class XMLPARSER_EXPORT XMLFormatter : public XMemory
171{
172public:
173    // -----------------------------------------------------------------------
174    //  Class types
175    // -----------------------------------------------------------------------
176    /** @name Public Contants */
177    //@{
178    /**
179     * EscapeFlags - Different styles of escape flags to control various formatting.
180     *
181     * <p><code>NoEscapes:</code>
182     * No character needs to be escaped.   Just write them out as is.</p>
183     * <p><code>StdEscapes:</code>
184     * The following characters need to be escaped:</p>
185     * <table border='1'>
186     * <tr>
187     * <td>character</td>
188     * <td>should be escaped and written as</td>
189     * </tr>
190     * <tr>
191     * <td valign='top' rowspan='1' colspan='1'>&amp;</td>
192     * <td valign='top' rowspan='1' colspan='1'>&amp;amp;</td>
193     * </tr>
194     * <tr>
195     * <td valign='top' rowspan='1' colspan='1'>&gt;</td>
196     * <td valign='top' rowspan='1' colspan='1'>&amp;gt;</td>
197     * </tr>
198     * <tr>
199     * <td valign='top' rowspan='1' colspan='1'>&quot;</td>
200     * <td valign='top' rowspan='1' colspan='1'>&amp;quot;</td>
201     * </tr>
202     * <tr>
203     * <td valign='top' rowspan='1' colspan='1'>&lt;</td>
204     * <td valign='top' rowspan='1' colspan='1'>&amp;lt;</td>
205     * </tr>
206     * <tr>
207     * <td valign='top' rowspan='1' colspan='1'>&apos;</td>
208     * <td valign='top' rowspan='1' colspan='1'>&amp;apos;</td>
209     * </tr>
210     * </table>
211     * <p><code>AttrEscapes:</code>
212     * The following characters need to be escaped:</p>
213     * <table border='1'>
214     * <tr>
215     * <td>character</td>
216     * <td>should be escaped and written as</td>
217     * </tr>
218     * <tr>
219     * <td valign='top' rowspan='1' colspan='1'>&amp;</td>
220     * <td valign='top' rowspan='1' colspan='1'>&amp;amp;</td>
221     * </tr>
222     * <tr>
223     * <td valign='top' rowspan='1' colspan='1'>&gt;</td>
224     * <td valign='top' rowspan='1' colspan='1'>&amp;gt;</td>
225     * </tr>
226     * <tr>
227     * <td valign='top' rowspan='1' colspan='1'>&quot;</td>
228     * <td valign='top' rowspan='1' colspan='1'>&amp;quot;</td>
229     * </tr>
230     * </table>
231     * <p><code>CharEscapes:</code>
232     * The following characters need to be escaped:</p>
233     * <table border='1'>
234     * <tr>
235     * <td>character</td>
236     * <td>should be escaped and written as</td>
237     * </tr>
238     * <tr>
239     * <td valign='top' rowspan='1' colspan='1'>&amp;</td>
240     * <td valign='top' rowspan='1' colspan='1'>&amp;amp;</td>
241     * </tr>
242     * <tr>
243     * <td valign='top' rowspan='1' colspan='1'>&gt;</td>
244     * <td valign='top' rowspan='1' colspan='1'>&amp;gt;</td>
245     * </tr>
246     * </table>
247     * <p><code>EscapeFlags_Count:</code>
248     * Special value, do not use directly.</p>
249     * <p><code>DefaultEscape:</code>
250     * Special value, do not use directly.</p>
251     *
252     */
253    enum EscapeFlags
254    {
255        NoEscapes
256        , StdEscapes
257        , AttrEscapes
258        , CharEscapes
259
260        // Special values, don't use directly
261        , EscapeFlags_Count
262        , DefaultEscape     = 999
263    };
264
265    /**
266     * UnRepFlags
267     *
268     * The unrepresentable flags that indicate how to react when a
269     * character cannot be represented in the target encoding.
270     *
271     * <p><code>UnRep_Fail:</code>
272     * Fail the operation.</p>
273     * <p><code>UnRep_CharRef:</code>
274     * Display the unrepresented character as reference.</p>
275     * <p><code>UnRep_Replace:</code>
276     * Replace the unrepresented character with the replacement character.</p>
277     * <p><code>DefaultUnRep:</code>
278     * Special value, do not use directly.</p>
279     *
280     */
281    enum UnRepFlags
282    {
283        UnRep_Fail
284        , UnRep_CharRef
285        , UnRep_Replace
286
287        , DefaultUnRep      = 999
288    };
289    //@}
290
291
292    // -----------------------------------------------------------------------
293    //  Constructors and Destructor
294    // -----------------------------------------------------------------------
295    /** @name Constructor and Destructor */
296    //@{
297    /**
298     * @param outEncoding the encoding for the formatted content
299     * @param docVersion 
300     * @param target      the formatTarget where the formatted content is written to
301     * @param escapeFlags the escape style for certain character
302     * @param unrepFlags  the reaction to unrepresentable character
303     * @param manager     Pointer to the memory manager to be used to
304     *                    allocate objects.
305     */
306    XMLFormatter
307    (
308        const   XMLCh* const            outEncoding
309        , const XMLCh* const            docVersion
310        ,       XMLFormatTarget* const  target
311        , const EscapeFlags             escapeFlags = NoEscapes
312        , const UnRepFlags              unrepFlags = UnRep_Fail
313        ,       MemoryManager* const    manager = XMLPlatformUtils::fgMemoryManager
314    );
315
316    XMLFormatter
317    (
318        const   char* const             outEncoding
319        , const char* const             docVersion
320        ,       XMLFormatTarget* const  target
321        , const EscapeFlags             escapeFlags = NoEscapes
322        , const UnRepFlags              unrepFlags = UnRep_Fail
323        ,       MemoryManager* const    manager = XMLPlatformUtils::fgMemoryManager
324    );
325
326    XMLFormatter
327    (
328        const   XMLCh* const            outEncoding
329        ,       XMLFormatTarget* const  target
330        , const EscapeFlags             escapeFlags = NoEscapes
331        , const UnRepFlags              unrepFlags = UnRep_Fail
332        ,       MemoryManager* const    manager = XMLPlatformUtils::fgMemoryManager
333    );
334
335    XMLFormatter
336    (
337        const   char* const             outEncoding
338        ,       XMLFormatTarget* const  target
339        , const EscapeFlags             escapeFlags = NoEscapes
340        , const UnRepFlags              unrepFlags = UnRep_Fail
341        ,       MemoryManager* const    manager = XMLPlatformUtils::fgMemoryManager
342    );
343   
344    ~XMLFormatter();
345    //@}
346
347
348    // -----------------------------------------------------------------------
349    //  Formatting methods
350    // -----------------------------------------------------------------------
351    /** @name Formatting methods */
352    //@{
353    /**
354     * @param toFormat the string to be formatted
355     * @param count    length of the string
356     * @param escapeFlags the escape style for formatting toFormat
357     * @param unrepFlags the reaction for any unrepresentable character in toFormat
358     *
359     */
360    void formatBuf
361    (
362        const   XMLCh* const    toFormat
363        , const unsigned int    count
364        , const EscapeFlags     escapeFlags = DefaultEscape
365        , const UnRepFlags      unrepFlags = DefaultUnRep
366    );
367
368    /**
369     * @see formatBuf
370     */
371    XMLFormatter& operator<<
372    (
373        const   XMLCh* const    toFormat
374    );
375
376    XMLFormatter& operator<<
377    (
378        const   XMLCh           toFormat
379    );
380
381    void writeBOM(const XMLByte* const toFormat
382                , const unsigned int   count);
383
384    //@}
385
386    // -----------------------------------------------------------------------
387    //  Getter methods
388    // -----------------------------------------------------------------------
389    /** @name Getter methods */
390    //@{
391    /**
392     * @return return the encoding set for the formatted content
393     */
394
395    const XMLCh* getEncodingName() const;
396
397    /**
398     * @return return the transcoder used internally for transcoding the formatter conent
399     */
400    inline const XMLTranscoder*   getTranscoder() const;
401
402   //@}
403
404    // -----------------------------------------------------------------------
405    //  Setter methods
406    // -----------------------------------------------------------------------
407    /** @name Setter methods */
408    //@{
409    /**
410     * @param newFlags set the escape style for the follow-on formatted content
411     */
412    void setEscapeFlags
413    (
414        const   EscapeFlags     newFlags
415    );
416
417    /**
418     * @param newFlags set the reaction for unrepresentable character
419     */
420    void setUnRepFlags
421    (
422        const   UnRepFlags      newFlags
423    );
424
425    /**
426     * @param newFlags set the escape style for the follow-on formatted content
427     * @see setEscapeFlags
428     */
429    XMLFormatter& operator<<
430    (
431        const   EscapeFlags     newFlags
432    );
433
434    /**
435     * @param newFlags set the reaction for unrepresentable character
436     * @see setUnRepFlags
437     */
438    XMLFormatter& operator<<
439    (
440        const   UnRepFlags      newFlags
441    );
442    //@}
443
444
445private :
446    // -----------------------------------------------------------------------
447    //  Unimplemented constructors and operators
448    // -----------------------------------------------------------------------
449    XMLFormatter();
450    XMLFormatter(const XMLFormatter&);
451    XMLFormatter& operator=(const XMLFormatter&);
452
453
454    // -----------------------------------------------------------------------
455    //  Private class constants
456    // -----------------------------------------------------------------------
457    enum Constants
458    {
459        kTmpBufSize     = 16 * 1024
460    };
461
462
463    // -----------------------------------------------------------------------
464    //  Private helper methods
465    // -----------------------------------------------------------------------
466    const XMLByte* getCharRef(unsigned int & count,
467                              XMLByte*      &ref,
468                              const XMLCh *  stdRef); 
469 
470    void writeCharRef(const XMLCh &toWrite);
471    void writeCharRef(unsigned long toWrite);
472
473    bool inEscapeList(const XMLFormatter::EscapeFlags escStyle
474                    , const XMLCh                     toCheck);
475                             
476
477    unsigned int handleUnEscapedChars(const XMLCh *                  srcPtr,
478                                      const unsigned int             count,
479                                      const UnRepFlags               unrepFlags);
480
481    void specialFormat
482    (
483        const   XMLCh* const    toFormat
484        , const unsigned int    count
485        , const EscapeFlags     escapeFlags
486    );
487
488
489    // -----------------------------------------------------------------------
490    //  Private, non-virtual methods
491    //
492    //  fEscapeFlags
493    //      The escape flags we were told to use in formatting. These are
494    //      defaults set in the ctor, which can be overridden on a particular
495    //      call.
496    //
497    //  fOutEncoding
498    //      This the name of the output encoding. Saved mainly for meaningful
499    //      error messages.
500    //
501    //  fTarget
502    //      This is the target object for the formatting operation.
503    //
504    //  fUnRepFlags
505    //      The unrepresentable flags that indicate how to react when a
506    //      character cannot be represented in the target encoding.
507    //
508    //  fXCoder
509    //      This the transcoder that we will use. It is created using the
510    //      encoding name we were told to use.
511    //
512    //  fTmpBuf
513    //      An output buffer that we use to transcode chars into before we
514    //      send them off to be output.
515    //
516    //  fAposRef
517    //  fAmpRef
518    //  fGTRef
519    //  fLTRef
520    //  fQuoteRef
521    //      These are character refs for the standard char refs, in the
522    //      output encoding. They are faulted in as required, by transcoding
523    //      them from fixed Unicode versions.
524    //
525    //  fIsXML11
526    //      for performance reason, we do not store the actual version string
527    //      and do the string comparison again and again.
528    //
529    // -----------------------------------------------------------------------
530    EscapeFlags                 fEscapeFlags;
531    XMLCh*                      fOutEncoding;
532    XMLFormatTarget*            fTarget;
533    UnRepFlags                  fUnRepFlags;
534    XMLTranscoder*              fXCoder;
535    XMLByte                     fTmpBuf[kTmpBufSize + 4];
536    XMLByte*                    fAposRef;
537    unsigned int                fAposLen;
538    XMLByte*                    fAmpRef;
539    unsigned int                fAmpLen;
540    XMLByte*                    fGTRef;
541    unsigned int                fGTLen;
542    XMLByte*                    fLTRef;
543    unsigned int                fLTLen;
544    XMLByte*                    fQuoteRef;
545    unsigned int                fQuoteLen;
546    bool                        fIsXML11;
547    MemoryManager*              fMemoryManager;
548};
549
550
551class XMLPARSER_EXPORT XMLFormatTarget : public XMemory
552{
553public:
554    // -----------------------------------------------------------------------
555    //  Constructors and Destructor
556    // -----------------------------------------------------------------------
557    virtual ~XMLFormatTarget() {}
558
559
560    // -----------------------------------------------------------------------
561    //  Virtual interface
562    // -----------------------------------------------------------------------
563    virtual void writeChars
564    (
565          const XMLByte* const      toWrite
566        , const unsigned int        count
567        ,       XMLFormatter* const formatter
568    ) = 0;
569
570    virtual void flush() {};
571
572
573protected :
574    // -----------------------------------------------------------------------
575    //  Hidden constructors and operators
576    // -----------------------------------------------------------------------
577    XMLFormatTarget() {};
578
579private:
580    // -----------------------------------------------------------------------
581    //  Unimplemented constructors and operators
582    // -----------------------------------------------------------------------
583    XMLFormatTarget(const XMLFormatTarget&);
584    XMLFormatTarget& operator=(const XMLFormatTarget&);
585};
586
587
588// ---------------------------------------------------------------------------
589//  XMLFormatter: Getter methods
590// ---------------------------------------------------------------------------
591inline const XMLCh* XMLFormatter::getEncodingName() const
592{
593    return fOutEncoding;
594}
595
596inline const XMLTranscoder* XMLFormatter::getTranscoder() const
597{
598    return fXCoder;
599}
600
601// ---------------------------------------------------------------------------
602//  XMLFormatter: Setter methods
603// ---------------------------------------------------------------------------
604inline void XMLFormatter::setEscapeFlags(const EscapeFlags newFlags)
605{
606    fEscapeFlags = newFlags;
607}
608
609inline void XMLFormatter::setUnRepFlags(const UnRepFlags newFlags)
610{
611    fUnRepFlags = newFlags;
612}
613
614
615inline XMLFormatter& XMLFormatter::operator<<(const EscapeFlags newFlags)
616{
617    fEscapeFlags = newFlags;
618    return *this;
619}
620
621inline XMLFormatter& XMLFormatter::operator<<(const UnRepFlags newFlags)
622{
623    fUnRepFlags = newFlags;
624    return *this;
625}
626
627XERCES_CPP_NAMESPACE_END
628
629#endif
Note: See TracBrowser for help on using the repository browser.