source: NonGTP/Xerces/xerces-c_2_8_0/include/xercesc/framework/XMLFormatter.hpp @ 2674

Revision 2674, 16.6 KB checked in by mattausch, 16 years ago (diff)
Line 
1/*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements.  See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License.  You may obtain a copy of the License at
8 *
9 *      http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18/*
19 * $Id: XMLFormatter.hpp 568078 2007-08-21 11:43:25Z amassari $
20 */
21
22#if !defined(XMLFORMATTER_HPP)
23#define XMLFORMATTER_HPP
24
25#include <xercesc/util/PlatformUtils.hpp>
26
27XERCES_CPP_NAMESPACE_BEGIN
28
29class XMLFormatTarget;
30class XMLTranscoder;
31
32/**
33 *  This class provides the basic formatting capabilities that are required
34 *  to turn the Unicode based XML data from the parsers into a form that can
35 *  be used on non-Unicode based systems, that is, into local or generic text
36 *  encodings.
37 *
38 *  A number of flags are provided to control whether various optional
39 *  formatting operations are performed.
40 */
41class XMLPARSER_EXPORT XMLFormatter : public XMemory
42{
43public:
44    // -----------------------------------------------------------------------
45    //  Class types
46    // -----------------------------------------------------------------------
47    /** @name Public Contants */
48    //@{
49    /**
50     * EscapeFlags - Different styles of escape flags to control various formatting.
51     *
52     * <p><code>NoEscapes:</code>
53     * No character needs to be escaped.   Just write them out as is.</p>
54     * <p><code>StdEscapes:</code>
55     * The following characters need to be escaped:</p>
56     * <table border='1'>
57     * <tr>
58     * <td>character</td>
59     * <td>should be escaped and written as</td>
60     * </tr>
61     * <tr>
62     * <td valign='top' rowspan='1' colspan='1'>&amp;</td>
63     * <td valign='top' rowspan='1' colspan='1'>&amp;amp;</td>
64     * </tr>
65     * <tr>
66     * <td valign='top' rowspan='1' colspan='1'>&gt;</td>
67     * <td valign='top' rowspan='1' colspan='1'>&amp;gt;</td>
68     * </tr>
69     * <tr>
70     * <td valign='top' rowspan='1' colspan='1'>&quot;</td>
71     * <td valign='top' rowspan='1' colspan='1'>&amp;quot;</td>
72     * </tr>
73     * <tr>
74     * <td valign='top' rowspan='1' colspan='1'>&lt;</td>
75     * <td valign='top' rowspan='1' colspan='1'>&amp;lt;</td>
76     * </tr>
77     * <tr>
78     * <td valign='top' rowspan='1' colspan='1'>&apos;</td>
79     * <td valign='top' rowspan='1' colspan='1'>&amp;apos;</td>
80     * </tr>
81     * </table>
82     * <p><code>AttrEscapes:</code>
83     * The following characters need to be escaped:</p>
84     * <table border='1'>
85     * <tr>
86     * <td>character</td>
87     * <td>should be escaped and written as</td>
88     * </tr>
89     * <tr>
90     * <td valign='top' rowspan='1' colspan='1'>&amp;</td>
91     * <td valign='top' rowspan='1' colspan='1'>&amp;amp;</td>
92     * </tr>
93     * <tr>
94     * <td valign='top' rowspan='1' colspan='1'>&gt;</td>
95     * <td valign='top' rowspan='1' colspan='1'>&amp;gt;</td>
96     * </tr>
97     * <tr>
98     * <td valign='top' rowspan='1' colspan='1'>&quot;</td>
99     * <td valign='top' rowspan='1' colspan='1'>&amp;quot;</td>
100     * </tr>
101     * </table>
102     * <p><code>CharEscapes:</code>
103     * The following characters need to be escaped:</p>
104     * <table border='1'>
105     * <tr>
106     * <td>character</td>
107     * <td>should be escaped and written as</td>
108     * </tr>
109     * <tr>
110     * <td valign='top' rowspan='1' colspan='1'>&amp;</td>
111     * <td valign='top' rowspan='1' colspan='1'>&amp;amp;</td>
112     * </tr>
113     * <tr>
114     * <td valign='top' rowspan='1' colspan='1'>&gt;</td>
115     * <td valign='top' rowspan='1' colspan='1'>&amp;gt;</td>
116     * </tr>
117     * </table>
118     * <p><code>EscapeFlags_Count:</code>
119     * Special value, do not use directly.</p>
120     * <p><code>DefaultEscape:</code>
121     * Special value, do not use directly.</p>
122     *
123     */
124    enum EscapeFlags
125    {
126        NoEscapes
127        , StdEscapes
128        , AttrEscapes
129        , CharEscapes
130
131        // Special values, don't use directly
132        , EscapeFlags_Count
133        , DefaultEscape     = 999
134    };
135
136    /**
137     * UnRepFlags
138     *
139     * The unrepresentable flags that indicate how to react when a
140     * character cannot be represented in the target encoding.
141     *
142     * <p><code>UnRep_Fail:</code>
143     * Fail the operation.</p>
144     * <p><code>UnRep_CharRef:</code>
145     * Display the unrepresented character as reference.</p>
146     * <p><code>UnRep_Replace:</code>
147     * Replace the unrepresented character with the replacement character.</p>
148     * <p><code>DefaultUnRep:</code>
149     * Special value, do not use directly.</p>
150     *
151     */
152    enum UnRepFlags
153    {
154        UnRep_Fail
155        , UnRep_CharRef
156        , UnRep_Replace
157
158        , DefaultUnRep      = 999
159    };
160    //@}
161
162
163    // -----------------------------------------------------------------------
164    //  Constructors and Destructor
165    // -----------------------------------------------------------------------
166    /** @name Constructor and Destructor */
167    //@{
168    /**
169     * @param outEncoding the encoding for the formatted content
170     * @param docVersion 
171     * @param target      the formatTarget where the formatted content is written to
172     * @param escapeFlags the escape style for certain character
173     * @param unrepFlags  the reaction to unrepresentable character
174     * @param manager     Pointer to the memory manager to be used to
175     *                    allocate objects.
176     */
177    XMLFormatter
178    (
179        const   XMLCh* const            outEncoding
180        , const XMLCh* const            docVersion
181        ,       XMLFormatTarget* const  target
182        , const EscapeFlags             escapeFlags = NoEscapes
183        , const UnRepFlags              unrepFlags = UnRep_Fail
184        ,       MemoryManager* const    manager = XMLPlatformUtils::fgMemoryManager
185    );
186
187    XMLFormatter
188    (
189        const   char* const             outEncoding
190        , const char* const             docVersion
191        ,       XMLFormatTarget* const  target
192        , const EscapeFlags             escapeFlags = NoEscapes
193        , const UnRepFlags              unrepFlags = UnRep_Fail
194        ,       MemoryManager* const    manager = XMLPlatformUtils::fgMemoryManager
195    );
196
197    XMLFormatter
198    (
199        const   XMLCh* const            outEncoding
200        ,       XMLFormatTarget* const  target
201        , const EscapeFlags             escapeFlags = NoEscapes
202        , const UnRepFlags              unrepFlags = UnRep_Fail
203        ,       MemoryManager* const    manager = XMLPlatformUtils::fgMemoryManager
204    );
205
206    XMLFormatter
207    (
208        const   char* const             outEncoding
209        ,       XMLFormatTarget* const  target
210        , const EscapeFlags             escapeFlags = NoEscapes
211        , const UnRepFlags              unrepFlags = UnRep_Fail
212        ,       MemoryManager* const    manager = XMLPlatformUtils::fgMemoryManager
213    );
214   
215    ~XMLFormatter();
216    //@}
217
218
219    // -----------------------------------------------------------------------
220    //  Formatting methods
221    // -----------------------------------------------------------------------
222    /** @name Formatting methods */
223    //@{
224    /**
225     * @param toFormat the string to be formatted
226     * @param count    length of the string
227     * @param escapeFlags the escape style for formatting toFormat
228     * @param unrepFlags the reaction for any unrepresentable character in toFormat
229     *
230     */
231    void formatBuf
232    (
233        const   XMLCh* const    toFormat
234        , const unsigned int    count
235        , const EscapeFlags     escapeFlags = DefaultEscape
236        , const UnRepFlags      unrepFlags = DefaultUnRep
237    );
238
239    /**
240     * @see formatBuf
241     */
242    XMLFormatter& operator<<
243    (
244        const   XMLCh* const    toFormat
245    );
246
247    XMLFormatter& operator<<
248    (
249        const   XMLCh           toFormat
250    );
251
252    void writeBOM(const XMLByte* const toFormat
253                , const unsigned int   count);
254
255    //@}
256
257    // -----------------------------------------------------------------------
258    //  Getter methods
259    // -----------------------------------------------------------------------
260    /** @name Getter methods */
261    //@{
262    /**
263     * @return return the encoding set for the formatted content
264     */
265
266    const XMLCh* getEncodingName() const;
267
268    /**
269     * @return return the transcoder used internally for transcoding the formatter conent
270     */
271    inline const XMLTranscoder*   getTranscoder() const;
272
273   //@}
274
275    // -----------------------------------------------------------------------
276    //  Setter methods
277    // -----------------------------------------------------------------------
278    /** @name Setter methods */
279    //@{
280    /**
281     * @param newFlags set the escape style for the follow-on formatted content
282     */
283    void setEscapeFlags
284    (
285        const   EscapeFlags     newFlags
286    );
287
288    /**
289     * @param newFlags set the reaction for unrepresentable character
290     */
291    void setUnRepFlags
292    (
293        const   UnRepFlags      newFlags
294    );
295
296    /**
297     * @param newFlags set the escape style for the follow-on formatted content
298     * @see setEscapeFlags
299     */
300    XMLFormatter& operator<<
301    (
302        const   EscapeFlags     newFlags
303    );
304
305    /**
306     * @param newFlags set the reaction for unrepresentable character
307     * @see setUnRepFlags
308     */
309    XMLFormatter& operator<<
310    (
311        const   UnRepFlags      newFlags
312    );
313    //@}
314
315
316    // -----------------------------------------------------------------------
317    //  Getter methods
318    // -----------------------------------------------------------------------
319    /** @name Setter methods */
320    //@{
321    /**
322     * @return return the escape style for the formatted content
323     */
324    EscapeFlags getEscapeFlags() const;
325
326    /**
327     * @return return the reaction for unrepresentable character
328     */
329    UnRepFlags getUnRepFlags() const;
330    //@}
331
332private :
333    // -----------------------------------------------------------------------
334    //  Unimplemented constructors and operators
335    // -----------------------------------------------------------------------
336    XMLFormatter();
337    XMLFormatter(const XMLFormatter&);
338    XMLFormatter& operator=(const XMLFormatter&);
339
340
341    // -----------------------------------------------------------------------
342    //  Private class constants
343    // -----------------------------------------------------------------------
344    enum Constants
345    {
346        kTmpBufSize     = 16 * 1024
347    };
348
349
350    // -----------------------------------------------------------------------
351    //  Private helper methods
352    // -----------------------------------------------------------------------
353    const XMLByte* getCharRef(unsigned int & count,
354                              XMLByte*      &ref,
355                              const XMLCh *  stdRef); 
356 
357    void writeCharRef(const XMLCh &toWrite);
358    void writeCharRef(unsigned long toWrite);
359
360    bool inEscapeList(const XMLFormatter::EscapeFlags escStyle
361                    , const XMLCh                     toCheck);
362                             
363
364    unsigned int handleUnEscapedChars(const XMLCh *                  srcPtr,
365                                      const unsigned int             count,
366                                      const UnRepFlags               unrepFlags);
367
368    void specialFormat
369    (
370        const   XMLCh* const    toFormat
371        , const unsigned int    count
372        , const EscapeFlags     escapeFlags
373    );
374
375
376    // -----------------------------------------------------------------------
377    //  Private, non-virtual methods
378    //
379    //  fEscapeFlags
380    //      The escape flags we were told to use in formatting. These are
381    //      defaults set in the ctor, which can be overridden on a particular
382    //      call.
383    //
384    //  fOutEncoding
385    //      This the name of the output encoding. Saved mainly for meaningful
386    //      error messages.
387    //
388    //  fTarget
389    //      This is the target object for the formatting operation.
390    //
391    //  fUnRepFlags
392    //      The unrepresentable flags that indicate how to react when a
393    //      character cannot be represented in the target encoding.
394    //
395    //  fXCoder
396    //      This the transcoder that we will use. It is created using the
397    //      encoding name we were told to use.
398    //
399    //  fTmpBuf
400    //      An output buffer that we use to transcode chars into before we
401    //      send them off to be output.
402    //
403    //  fAposRef
404    //  fAmpRef
405    //  fGTRef
406    //  fLTRef
407    //  fQuoteRef
408    //      These are character refs for the standard char refs, in the
409    //      output encoding. They are faulted in as required, by transcoding
410    //      them from fixed Unicode versions.
411    //
412    //  fIsXML11
413    //      for performance reason, we do not store the actual version string
414    //      and do the string comparison again and again.
415    //
416    // -----------------------------------------------------------------------
417    EscapeFlags                 fEscapeFlags;
418    XMLCh*                      fOutEncoding;
419    XMLFormatTarget*            fTarget;
420    UnRepFlags                  fUnRepFlags;
421    XMLTranscoder*              fXCoder;
422    XMLByte                     fTmpBuf[kTmpBufSize + 4];
423    XMLByte*                    fAposRef;
424    unsigned int                fAposLen;
425    XMLByte*                    fAmpRef;
426    unsigned int                fAmpLen;
427    XMLByte*                    fGTRef;
428    unsigned int                fGTLen;
429    XMLByte*                    fLTRef;
430    unsigned int                fLTLen;
431    XMLByte*                    fQuoteRef;
432    unsigned int                fQuoteLen;
433    bool                        fIsXML11;
434    MemoryManager*              fMemoryManager;
435};
436
437
438class XMLPARSER_EXPORT XMLFormatTarget : public XMemory
439{
440public:
441    // -----------------------------------------------------------------------
442    //  Constructors and Destructor
443    // -----------------------------------------------------------------------
444    virtual ~XMLFormatTarget() {}
445
446
447    // -----------------------------------------------------------------------
448    //  Virtual interface
449    // -----------------------------------------------------------------------
450    virtual void writeChars
451    (
452          const XMLByte* const      toWrite
453        , const unsigned int        count
454        ,       XMLFormatter* const formatter
455    ) = 0;
456
457    virtual void flush() {};
458
459
460protected :
461    // -----------------------------------------------------------------------
462    //  Hidden constructors and operators
463    // -----------------------------------------------------------------------
464    XMLFormatTarget() {};
465
466private:
467    // -----------------------------------------------------------------------
468    //  Unimplemented constructors and operators
469    // -----------------------------------------------------------------------
470    XMLFormatTarget(const XMLFormatTarget&);
471    XMLFormatTarget& operator=(const XMLFormatTarget&);
472};
473
474
475// ---------------------------------------------------------------------------
476//  XMLFormatter: Getter methods
477// ---------------------------------------------------------------------------
478inline const XMLCh* XMLFormatter::getEncodingName() const
479{
480    return fOutEncoding;
481}
482
483inline const XMLTranscoder* XMLFormatter::getTranscoder() const
484{
485    return fXCoder;
486}
487
488// ---------------------------------------------------------------------------
489//  XMLFormatter: Setter methods
490// ---------------------------------------------------------------------------
491inline void XMLFormatter::setEscapeFlags(const EscapeFlags newFlags)
492{
493    fEscapeFlags = newFlags;
494}
495
496inline void XMLFormatter::setUnRepFlags(const UnRepFlags newFlags)
497{
498    fUnRepFlags = newFlags;
499}
500
501
502inline XMLFormatter& XMLFormatter::operator<<(const EscapeFlags newFlags)
503{
504    fEscapeFlags = newFlags;
505    return *this;
506}
507
508inline XMLFormatter& XMLFormatter::operator<<(const UnRepFlags newFlags)
509{
510    fUnRepFlags = newFlags;
511    return *this;
512}
513
514// ---------------------------------------------------------------------------
515//  XMLFormatter: Getter methods
516// ---------------------------------------------------------------------------
517inline XMLFormatter::EscapeFlags XMLFormatter::getEscapeFlags() const
518{
519    return fEscapeFlags;
520}
521
522inline XMLFormatter::UnRepFlags XMLFormatter::getUnRepFlags() const
523{
524    return fUnRepFlags;
525}
526
527XERCES_CPP_NAMESPACE_END
528
529#endif
Note: See TracBrowser for help on using the repository browser.