source: NonGTP/Xerces/xerces/include/xercesc/framework/XMLFormatter.hpp @ 358

Revision 358, 19.2 KB checked in by bittner, 19 years ago (diff)

xerces added

Line 
1/*
2 * Copyright 1999-2000,2004 The Apache Software Foundation.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17/*
18 * $Log: XMLFormatter.hpp,v $
19 * Revision 1.21  2004/09/08 13:55:59  peiyongz
20 * Apache License Version 2.0
21 *
22 * Revision 1.20  2004/01/29 11:46:29  cargilld
23 * Code cleanup changes to get rid of various compiler diagnostic messages.
24 *
25 * Revision 1.19  2003/12/01 23:23:25  neilg
26 * fix for bug 25118; thanks to Jeroen Witmond
27 *
28 * Revision 1.18  2003/10/10 02:06:09  neilg
29 * fix for bug 21780; thanks to David Cargill
30 *
31 * Revision 1.17  2003/09/22 08:50:04  gareth
32 * doc fix
33 *
34 * Revision 1.16  2003/09/08 21:48:36  peiyongz
35 * Restore pre2.3 constructors
36 *
37 * Revision 1.15  2003/05/30 16:11:43  gareth
38 * Fixes so we compile under VC7.1. Patch by Alberto Massari.
39 *
40 * Revision 1.14  2003/05/16 21:36:55  knoaman
41 * Memory manager implementation: Modify constructors to pass in the memory manager.
42 *
43 * Revision 1.13  2003/05/15 18:26:07  knoaman
44 * Partial implementation of the configurable memory manager.
45 *
46 * Revision 1.12  2003/03/17 03:19:52  peiyongz
47 * Bug#18051 memory leakage in XMLFormatter
48 *
49 * Revision 1.11  2003/03/16 06:00:43  peiyongz
50 * Bug#17983 Formatter does not escape control characters
51 *
52 * Revision 1.10  2003/03/11 12:58:36  tng
53 * Fix compilation error on AIX.
54 *
55 * Revision 1.9  2003/03/07 21:42:37  tng
56 * [Bug 17589] Refactoring ... .  Patch from Jacques Legare.
57 *
58 * Revision 1.8  2003/03/07 18:08:10  tng
59 * Return a reference instead of void for operator=
60 *
61 * Revision 1.7  2003/01/31 00:30:48  jberry
62 * Syntax error in declaration
63 *
64 * Revision 1.6  2003/01/28 18:32:33  peiyongz
65 * Bug#13694: Allow Xerces to write the BOM to XML files
66 *
67 * Revision 1.5  2003/01/24 20:20:22  tng
68 * Add method flush to XMLFormatTarget
69 *
70 * Revision 1.4  2002/11/04 15:00:21  tng
71 * C++ Namespace Support.
72 *
73 * Revision 1.3  2002/07/30 16:29:16  tng
74 * [Bug 8550] No explanation of XMLFormatter escape options.
75 *
76 * Revision 1.2  2002/06/21 19:31:23  peiyongz
77 * getTranscoder() added;
78 *
79 * Revision 1.1.1.1  2002/02/01 22:21:52  peiyongz
80 * sane_include
81 *
82 * Revision 1.7  2000/10/17 19:25:38  andyh
83 * XMLFormatTarget, removed version of writeChars with no length.  Can not be
84 * safely used, and obscured other errors.
85 *
86 * Revision 1.6  2000/10/10 23:54:58  andyh
87 * XMLFormatter patch, contributed by Bill Schindler.  Fix problems with
88 * output to multi-byte encodings.
89 *
90 * Revision 1.5  2000/04/07 01:01:56  roddey
91 * Fixed an error message so that it indicated the correct radix for the rep
92 * token. Get all of the basic output formatting functionality in place for
93 * at least ICU and Win32 transcoders.
94 *
95 * Revision 1.4  2000/04/06 23:50:38  roddey
96 * Now the low level formatter handles doing char refs for
97 * unrepresentable chars (in addition to the replacement char style
98 * already done.)
99 *
100 * Revision 1.3  2000/04/06 19:09:21  roddey
101 * Some more improvements to output formatting. Now it will correctly
102 * handle doing the 'replacement char' style of dealing with chars
103 * that are unrepresentable.
104 *
105 * Revision 1.2  2000/04/05 00:20:16  roddey
106 * More updates for the low level formatted output support
107 *
108 * Revision 1.1  2000/03/28 19:43:17  roddey
109 * Fixes for signed/unsigned warnings. New work for two way transcoding
110 * stuff.
111 *
112 */
113
114#if !defined(XMLFORMATTER_HPP)
115#define XMLFORMATTER_HPP
116
117#include <xercesc/util/PlatformUtils.hpp>
118
119XERCES_CPP_NAMESPACE_BEGIN
120
121class XMLFormatTarget;
122class XMLTranscoder;
123
124/**
125 *  This class provides the basic formatting capabilities that are required
126 *  to turn the Unicode based XML data from the parsers into a form that can
127 *  be used on non-Unicode based systems, that is, into local or generic text
128 *  encodings.
129 *
130 *  A number of flags are provided to control whether various optional
131 *  formatting operations are performed.
132 */
133class XMLPARSER_EXPORT XMLFormatter : public XMemory
134{
135public:
136    // -----------------------------------------------------------------------
137    //  Class types
138    // -----------------------------------------------------------------------
139    /** @name Public Contants */
140    //@{
141    /**
142     * EscapeFlags - Different styles of escape flags to control various formatting.
143     *
144     * <p><code>NoEscapes:</code>
145     * No character needs to be escaped.   Just write them out as is.</p>
146     * <p><code>StdEscapes:</code>
147     * The following characters need to be escaped:</p>
148     * <table border='1'>
149     * <tr>
150     * <td>character</td>
151     * <td>should be escaped and written as</td>
152     * </tr>
153     * <tr>
154     * <td valign='top' rowspan='1' colspan='1'>&amp;</td>
155     * <td valign='top' rowspan='1' colspan='1'>&amp;amp;</td>
156     * </tr>
157     * <tr>
158     * <td valign='top' rowspan='1' colspan='1'>&gt;</td>
159     * <td valign='top' rowspan='1' colspan='1'>&amp;gt;</td>
160     * </tr>
161     * <tr>
162     * <td valign='top' rowspan='1' colspan='1'>&quot;</td>
163     * <td valign='top' rowspan='1' colspan='1'>&amp;quot;</td>
164     * </tr>
165     * <tr>
166     * <td valign='top' rowspan='1' colspan='1'>&lt;</td>
167     * <td valign='top' rowspan='1' colspan='1'>&amp;lt;</td>
168     * </tr>
169     * <tr>
170     * <td valign='top' rowspan='1' colspan='1'>&apos;</td>
171     * <td valign='top' rowspan='1' colspan='1'>&amp;apos;</td>
172     * </tr>
173     * </table>
174     * <p><code>AttrEscapes:</code>
175     * The following characters need to be escaped:</p>
176     * <table border='1'>
177     * <tr>
178     * <td>character</td>
179     * <td>should be escaped and written as</td>
180     * </tr>
181     * <tr>
182     * <td valign='top' rowspan='1' colspan='1'>&amp;</td>
183     * <td valign='top' rowspan='1' colspan='1'>&amp;amp;</td>
184     * </tr>
185     * <tr>
186     * <td valign='top' rowspan='1' colspan='1'>&gt;</td>
187     * <td valign='top' rowspan='1' colspan='1'>&amp;gt;</td>
188     * </tr>
189     * <tr>
190     * <td valign='top' rowspan='1' colspan='1'>&quot;</td>
191     * <td valign='top' rowspan='1' colspan='1'>&amp;quot;</td>
192     * </tr>
193     * </table>
194     * <p><code>CharEscapes:</code>
195     * The following characters need to be escaped:</p>
196     * <table border='1'>
197     * <tr>
198     * <td>character</td>
199     * <td>should be escaped and written as</td>
200     * </tr>
201     * <tr>
202     * <td valign='top' rowspan='1' colspan='1'>&amp;</td>
203     * <td valign='top' rowspan='1' colspan='1'>&amp;amp;</td>
204     * </tr>
205     * <tr>
206     * <td valign='top' rowspan='1' colspan='1'>&gt;</td>
207     * <td valign='top' rowspan='1' colspan='1'>&amp;gt;</td>
208     * </tr>
209     * </table>
210     * <p><code>EscapeFlags_Count:</code>
211     * Special value, do not use directly.</p>
212     * <p><code>DefaultEscape:</code>
213     * Special value, do not use directly.</p>
214     *
215     */
216    enum EscapeFlags
217    {
218        NoEscapes
219        , StdEscapes
220        , AttrEscapes
221        , CharEscapes
222
223        // Special values, don't use directly
224        , EscapeFlags_Count
225        , DefaultEscape     = 999
226    };
227
228    /**
229     * UnRepFlags
230     *
231     * The unrepresentable flags that indicate how to react when a
232     * character cannot be represented in the target encoding.
233     *
234     * <p><code>UnRep_Fail:</code>
235     * Fail the operation.</p>
236     * <p><code>UnRep_CharRef:</code>
237     * Display the unrepresented character as reference.</p>
238     * <p><code>UnRep_Replace:</code>
239     * Replace the unrepresented character with the replacement character.</p>
240     * <p><code>DefaultUnRep:</code>
241     * Special value, do not use directly.</p>
242     *
243     */
244    enum UnRepFlags
245    {
246        UnRep_Fail
247        , UnRep_CharRef
248        , UnRep_Replace
249
250        , DefaultUnRep      = 999
251    };
252    //@}
253
254
255    // -----------------------------------------------------------------------
256    //  Constructors and Destructor
257    // -----------------------------------------------------------------------
258    /** @name Constructor and Destructor */
259    //@{
260    /**
261     * @param outEncoding the encoding for the formatted content
262     * @param docVersion 
263     * @param target      the formatTarget where the formatted content is written to
264     * @param escapeFlags the escape style for certain character
265     * @param unrepFlags  the reaction to unrepresentable character
266     * @param manager     Pointer to the memory manager to be used to
267     *                    allocate objects.
268     */
269    XMLFormatter
270    (
271        const   XMLCh* const            outEncoding
272        , const XMLCh* const            docVersion
273        ,       XMLFormatTarget* const  target
274        , const EscapeFlags             escapeFlags = NoEscapes
275        , const UnRepFlags              unrepFlags = UnRep_Fail
276        ,       MemoryManager* const    manager = XMLPlatformUtils::fgMemoryManager
277    );
278
279    XMLFormatter
280    (
281        const   char* const             outEncoding
282        , const char* const             docVersion
283        ,       XMLFormatTarget* const  target
284        , const EscapeFlags             escapeFlags = NoEscapes
285        , const UnRepFlags              unrepFlags = UnRep_Fail
286        ,       MemoryManager* const    manager = XMLPlatformUtils::fgMemoryManager
287    );
288
289    XMLFormatter
290    (
291        const   XMLCh* const            outEncoding
292        ,       XMLFormatTarget* const  target
293        , const EscapeFlags             escapeFlags = NoEscapes
294        , const UnRepFlags              unrepFlags = UnRep_Fail
295        ,       MemoryManager* const    manager = XMLPlatformUtils::fgMemoryManager
296    );
297
298    XMLFormatter
299    (
300        const   char* const             outEncoding
301        ,       XMLFormatTarget* const  target
302        , const EscapeFlags             escapeFlags = NoEscapes
303        , const UnRepFlags              unrepFlags = UnRep_Fail
304        ,       MemoryManager* const    manager = XMLPlatformUtils::fgMemoryManager
305    );
306   
307    ~XMLFormatter();
308    //@}
309
310
311    // -----------------------------------------------------------------------
312    //  Formatting methods
313    // -----------------------------------------------------------------------
314    /** @name Formatting methods */
315    //@{
316    /**
317     * @param toFormat the string to be formatted
318     * @param count    length of the string
319     * @param escapeFlags the escape style for formatting toFormat
320     * @param unrepFlags the reaction for any unrepresentable character in toFormat
321     *
322     */
323    void formatBuf
324    (
325        const   XMLCh* const    toFormat
326        , const unsigned int    count
327        , const EscapeFlags     escapeFlags = DefaultEscape
328        , const UnRepFlags      unrepFlags = DefaultUnRep
329    );
330
331    /**
332     * @see formatBuf
333     */
334    XMLFormatter& operator<<
335    (
336        const   XMLCh* const    toFormat
337    );
338
339    XMLFormatter& operator<<
340    (
341        const   XMLCh           toFormat
342    );
343
344    void writeBOM(const XMLByte* const toFormat
345                , const unsigned int   count);
346
347    //@}
348
349    // -----------------------------------------------------------------------
350    //  Getter methods
351    // -----------------------------------------------------------------------
352    /** @name Getter methods */
353    //@{
354    /**
355     * @return return the encoding set for the formatted content
356     */
357
358    const XMLCh* getEncodingName() const;
359
360    /**
361     * @return return the transcoder used internally for transcoding the formatter conent
362     */
363    inline const XMLTranscoder*   getTranscoder() const;
364
365   //@}
366
367    // -----------------------------------------------------------------------
368    //  Setter methods
369    // -----------------------------------------------------------------------
370    /** @name Setter methods */
371    //@{
372    /**
373     * @param newFlags set the escape style for the follow-on formatted content
374     */
375    void setEscapeFlags
376    (
377        const   EscapeFlags     newFlags
378    );
379
380    /**
381     * @param newFlags set the reaction for unrepresentable character
382     */
383    void setUnRepFlags
384    (
385        const   UnRepFlags      newFlags
386    );
387
388    /**
389     * @param newFlags set the escape style for the follow-on formatted content
390     * @see setEscapeFlags
391     */
392    XMLFormatter& operator<<
393    (
394        const   EscapeFlags     newFlags
395    );
396
397    /**
398     * @param newFlags set the reaction for unrepresentable character
399     * @see setUnRepFlags
400     */
401    XMLFormatter& operator<<
402    (
403        const   UnRepFlags      newFlags
404    );
405    //@}
406
407
408private :
409    // -----------------------------------------------------------------------
410    //  Unimplemented constructors and operators
411    // -----------------------------------------------------------------------
412    XMLFormatter();
413    XMLFormatter(const XMLFormatter&);
414    XMLFormatter& operator=(const XMLFormatter&);
415
416
417    // -----------------------------------------------------------------------
418    //  Private class constants
419    // -----------------------------------------------------------------------
420    enum Constants
421    {
422        kTmpBufSize     = 16 * 1024
423    };
424
425
426    // -----------------------------------------------------------------------
427    //  Private helper methods
428    // -----------------------------------------------------------------------
429    const XMLByte* getCharRef(unsigned int & count,
430                              XMLByte*      &ref,
431                              const XMLCh *  stdRef); 
432 
433    void writeCharRef(const XMLCh &toWrite);
434    void writeCharRef(unsigned long toWrite);
435
436    bool inEscapeList(const XMLFormatter::EscapeFlags escStyle
437                    , const XMLCh                     toCheck);
438                             
439
440    unsigned int handleUnEscapedChars(const XMLCh *                  srcPtr,
441                                      const unsigned int             count,
442                                      const UnRepFlags               unrepFlags);
443
444    void specialFormat
445    (
446        const   XMLCh* const    toFormat
447        , const unsigned int    count
448        , const EscapeFlags     escapeFlags
449    );
450
451
452    // -----------------------------------------------------------------------
453    //  Private, non-virtual methods
454    //
455    //  fEscapeFlags
456    //      The escape flags we were told to use in formatting. These are
457    //      defaults set in the ctor, which can be overridden on a particular
458    //      call.
459    //
460    //  fOutEncoding
461    //      This the name of the output encoding. Saved mainly for meaningful
462    //      error messages.
463    //
464    //  fTarget
465    //      This is the target object for the formatting operation.
466    //
467    //  fUnRepFlags
468    //      The unrepresentable flags that indicate how to react when a
469    //      character cannot be represented in the target encoding.
470    //
471    //  fXCoder
472    //      This the transcoder that we will use. It is created using the
473    //      encoding name we were told to use.
474    //
475    //  fTmpBuf
476    //      An output buffer that we use to transcode chars into before we
477    //      send them off to be output.
478    //
479    //  fAposRef
480    //  fAmpRef
481    //  fGTRef
482    //  fLTRef
483    //  fQuoteRef
484    //      These are character refs for the standard char refs, in the
485    //      output encoding. They are faulted in as required, by transcoding
486    //      them from fixed Unicode versions.
487    //
488    //  fIsXML11
489    //      for performance reason, we do not store the actual version string
490    //      and do the string comparison again and again.
491    //
492    // -----------------------------------------------------------------------
493    EscapeFlags                 fEscapeFlags;
494    XMLCh*                      fOutEncoding;
495    XMLFormatTarget*            fTarget;
496    UnRepFlags                  fUnRepFlags;
497    XMLTranscoder*              fXCoder;
498    XMLByte                     fTmpBuf[kTmpBufSize + 4];
499    XMLByte*                    fAposRef;
500    unsigned int                fAposLen;
501    XMLByte*                    fAmpRef;
502    unsigned int                fAmpLen;
503    XMLByte*                    fGTRef;
504    unsigned int                fGTLen;
505    XMLByte*                    fLTRef;
506    unsigned int                fLTLen;
507    XMLByte*                    fQuoteRef;
508    unsigned int                fQuoteLen;
509    bool                        fIsXML11;
510    MemoryManager*              fMemoryManager;
511};
512
513
514class XMLPARSER_EXPORT XMLFormatTarget : public XMemory
515{
516public:
517    // -----------------------------------------------------------------------
518    //  Constructors and Destructor
519    // -----------------------------------------------------------------------
520    virtual ~XMLFormatTarget() {}
521
522
523    // -----------------------------------------------------------------------
524    //  Virtual interface
525    // -----------------------------------------------------------------------
526    virtual void writeChars
527    (
528          const XMLByte* const      toWrite
529        , const unsigned int        count
530        ,       XMLFormatter* const formatter
531    ) = 0;
532
533    virtual void flush() {};
534
535
536protected :
537    // -----------------------------------------------------------------------
538    //  Hidden constructors and operators
539    // -----------------------------------------------------------------------
540    XMLFormatTarget() {};
541
542private:
543    // -----------------------------------------------------------------------
544    //  Unimplemented constructors and operators
545    // -----------------------------------------------------------------------
546    XMLFormatTarget(const XMLFormatTarget&);
547    XMLFormatTarget& operator=(const XMLFormatTarget&);
548};
549
550
551// ---------------------------------------------------------------------------
552//  XMLFormatter: Getter methods
553// ---------------------------------------------------------------------------
554inline const XMLCh* XMLFormatter::getEncodingName() const
555{
556    return fOutEncoding;
557}
558
559inline const XMLTranscoder* XMLFormatter::getTranscoder() const
560{
561    return fXCoder;
562}
563
564// ---------------------------------------------------------------------------
565//  XMLFormatter: Setter methods
566// ---------------------------------------------------------------------------
567inline void XMLFormatter::setEscapeFlags(const EscapeFlags newFlags)
568{
569    fEscapeFlags = newFlags;
570}
571
572inline void XMLFormatter::setUnRepFlags(const UnRepFlags newFlags)
573{
574    fUnRepFlags = newFlags;
575}
576
577
578inline XMLFormatter& XMLFormatter::operator<<(const EscapeFlags newFlags)
579{
580    fEscapeFlags = newFlags;
581    return *this;
582}
583
584inline XMLFormatter& XMLFormatter::operator<<(const UnRepFlags newFlags)
585{
586    fUnRepFlags = newFlags;
587    return *this;
588}
589
590XERCES_CPP_NAMESPACE_END
591
592#endif
Note: See TracBrowser for help on using the repository browser.