source: NonGTP/Xerces/xercesc/dom/impl/DOMWriterImpl.hpp @ 188

Revision 188, 22.7 KB checked in by mattausch, 20 years ago (diff)

added xercesc to support

Line 
1/*
2 * The Apache Software License, Version 1.1
3 *
4 * Copyright (c) 2002 The Apache Software Foundation.  All rights
5 * reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 *
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in
16 *    the documentation and/or other materials provided with the
17 *    distribution.
18 *
19 * 3. The end-user documentation included with the redistribution,
20 *    if any, must include the following acknowledgment:
21 *       "This product includes software developed by the
22 *        Apache Software Foundation (http://www.apache.org/)."
23 *    Alternately, this acknowledgment may appear in the software itself,
24 *    if and wherever such third-party acknowledgments normally appear.
25 *
26 * 4. The names "Xerces" and "Apache Software Foundation" must
27 *    not be used to endorse or promote products derived from this
28 *    software without prior written permission. For written
29 *    permission, please contact apache\@apache.org.
30 *
31 * 5. Products derived from this software may not be called "Apache",
32 *    nor may "Apache" appear in their name, without prior written
33 *    permission of the Apache Software Foundation.
34 *
35 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
36 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
37 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
38 * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
39 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
40 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
41 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
42 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
43 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
44 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
45 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
46 * SUCH DAMAGE.
47 * ====================================================================
48 *
49 * This software consists of voluntary contributions made by many
50 * individuals on behalf of the Apache Software Foundation, and was
51 * originally based on software copyright (c) 1999, International
52 * Business Machines, Inc., http://www.ibm.com .  For more information
53 * on the Apache Software Foundation, please see
54 * <http://www.apache.org/>.
55 */
56
57/*
58 * $Id: DOMWriterImpl.hpp,v 1.18 2003/08/14 16:31:13 gareth Exp $
59 * $Log: DOMWriterImpl.hpp,v $
60 * Revision 1.18  2003/08/14 16:31:13  gareth
61 * Method added to allow serilization of custom nodes from derived classes.
62 *
63 * Revision 1.17  2003/05/29 18:47:52  knoaman
64 * Apply memory manager.
65 *
66 * Revision 1.16  2003/05/22 02:10:51  knoaman
67 * Default the memory manager.
68 *
69 * Revision 1.15  2003/05/15 18:25:54  knoaman
70 * Partial implementation of the configurable memory manager.
71 *
72 * Revision 1.14  2003/05/12 16:08:11  gareth
73 * fix to #18832. Corrected serilization with regards to namespace nodes. Patch by Alby Massari.
74 *
75 * Revision 1.13  2003/03/16 05:42:04  peiyongz
76 * Bug#17983 Formatter does not escape control characters
77 *
78 * Revision 1.12  2003/01/28 18:31:47  peiyongz
79 * Bug#13694: Allow Xerces to write the BOM to XML files
80 *
81 * Revision 1.11  2003/01/20 16:50:13  tng
82 * DOMWriter fix:
83 * 1. wrong wrong nested cdata message
84 * 2. pretty format the cdata section
85 * 3. do not increment error count if warning was issued
86 *
87 * Revision 1.10  2002/12/10 21:01:32  tng
88 * NLS: DOMWriter should use message loader to load message instead of using hardcoded static stirng
89 *
90 * Revision 1.9  2002/12/09 11:46:08  gareth
91 * More pretty pretty print feature. Patch by Kevin King. Closes bug #13840.
92 *
93 * Revision 1.8  2002/11/04 15:07:35  tng
94 * C++ Namespace Support.
95 *
96 * Revision 1.7  2002/06/25 16:17:16  tng
97 * DOM L3: add release()
98 *
99 * Revision 1.6  2002/06/21 19:33:12  peiyongz
100 * support for feature split_cdata_section and entities revised.
101 *
102 * Revision 1.5  2002/06/17 19:45:58  peiyongz
103 * optimization on fFeatures and featureId introduced
104 *
105 * Revision 1.4  2002/06/14 15:39:02  peiyongz
106 * Fix: Compilation error from ForteC on Solaris2.6
107 *
108 * Revision 1.3  2002/06/10 16:02:21  peiyongz
109 * format-pretty-print partially supported
110 * resolve encoding from DOMDocument Interface
111 *
112 * Revision 1.2  2002/06/05 16:03:03  peiyongz
113 * delete[] used.
114 *
115 * Revision 1.1  2002/05/28 22:39:39  peiyongz
116 * DOM3 Save Interface: DOMWriter/DOMWriterFilter
117 *
118 */
119
120/**
121 *  DOMWriterImpl provides an API for serializing (writing) a DOM document out in
122 * an XML document. The XML data is written to an output stream, the type of
123 * which depends on the specific language bindings in use. During
124 * serialization of XML data, namespace fixup is done when possible.
125 * <p> <code>DOMWriterImpl</code> accepts any node type for serialization. For
126 * nodes of type <code>Document</code> or <code>Entity</code>, well formed
127 * XML will be created if possible. The serialized output for these node
128 * types is either as a Document or an External Entity, respectively, and is
129 * acceptable input for an XML parser. For all other types of nodes the
130 * serialized form is not specified, but should be something useful to a
131 * human for debugging or diagnostic purposes. Note: rigorously designing an
132 * external (source) form for stand-alone node types that don't already have
133 * one defined in  seems a bit much to take on here.
134 * <p>Within a Document or Entity being serialized, Nodes are processed as
135 * follows Documents are written including an XML declaration and a DTD
136 * subset, if one exists in the DOM. Writing a document node serializes the
137 * entire document.  Entity nodes, when written directly by
138 * <code>writeNode</code> defined in the <code>DOMWriterImpl</code> interface,
139 * output the entity expansion but no namespace fixup is done. The resulting
140 * output will be valid as an external entity.  Entity References nodes are
141 * serializes as an entity reference of the form
142 * <code>"&amp;entityName;"</code>) in the output. Child nodes (the
143 * expansion) of the entity reference are ignored.  CDATA sections
144 * containing content characters that can not be represented in the
145 * specified output encoding are handled according to the
146 * "split-cdata-sections" feature.If the feature is <code>true</code>, CDATA
147 * sections are split, and the unrepresentable characters are serialized as
148 * numeric character references in ordinary content. The exact position and
149 * number of splits is not specified. If the feature is <code>false</code>,
150 * unrepresentable characters in a CDATA section are reported as errors. The
151 * error is not recoverable - there is no mechanism for supplying
152 * alternative characters and continuing with the serialization. All other
153 * node types (Element, Text, etc.) are serialized to their corresponding
154 * XML source form.
155 * <p> Within the character data of a document (outside of markup), any
156 * characters that cannot be represented directly are replaced with
157 * character references. Occurrences of '&lt;' and '&amp;' are replaced by
158 * the predefined entities &amp;lt; and &amp;amp. The other predefined
159 * entities (&amp;gt, &amp;apos, etc.) are not used; these characters can be
160 * included directly. Any character that can not be represented directly in
161 * the output character encoding is serialized as a numeric character
162 * reference.
163 * <p> Attributes not containing quotes are serialized in quotes. Attributes
164 * containing quotes but no apostrophes are serialized in apostrophes
165 * (single quotes). Attributes containing both forms of quotes are
166 * serialized in quotes, with quotes within the value represented by the
167 * predefined entity &amp;quot;. Any character that can not be represented
168 * directly in the output character encoding is serialized as a numeric
169 * character reference.
170 * <p> Within markup, but outside of attributes, any occurrence of a character
171 * that cannot be represented in the output character encoding is reported
172 * as an error. An example would be serializing the element
173 * &lt;LaCañada/&gt; with the encoding="us-ascii".
174 * <p> When requested by setting the <code>normalize-characters</code> feature
175 * on <code>DOMWriterImpl</code>, all data to be serialized, both markup and
176 * character data, is W3C Text normalized according to the rules defined in
177 * . The W3C Text normalization process affects only the data as it is being
178 * written; it does not alter the DOM's view of the document after
179 * serialization has completed.
180 * <p>Namespaces are fixed up during serialization, the serialization process
181 * will verify that namespace declarations, namespace prefixes and the
182 * namespace URIs associated with Elements and Attributes are consistent. If
183 * inconsistencies are found, the serialized form of the document will be
184 * altered to remove them. The algorithm used for doing the namespace fixup
185 * while seralizing a document is a combination of the algorithms used for
186 * lookupNamespaceURI and lookupNamespacePrefix . previous paragraph to be
187 * defined closer here.
188 * <p>Any changes made affect only the namespace prefixes and declarations
189 * appearing in the serialized data. The DOM's view of the document is not
190 * altered by the serialization operation, and does not reflect any changes
191 * made to namespace declarations or prefixes in the serialized output.
192 * <p> While serializing a document the serializer will write out
193 * non-specified values (such as attributes whose <code>specified</code> is
194 * <code>false</code>) if the <code>output-default-values</code> feature is
195 * set to <code>true</code>. If the <code>output-default-values</code> flag
196 * is set to <code>false</code> and the <code>use-abstract-schema</code>
197 * feature is set to <code>true</code> the abstract schema will be used to
198 * determine if a value is specified or not, if
199 * <code>use-abstract-schema</code> is not set the <code>specified</code>
200 * flag on attribute nodes is used to determine if attribute values should
201 * be written out.
202 * <p> Ref to Core spec (1.1.9, XML namespaces, 5th paragraph) entity ref
203 * description about warning about unbound entity refs. Entity refs are
204 * always serialized as &amp;foo;, also mention this in the load part of
205 * this spec.
206 * <p> When serializing a document the DOMWriterImpl checks to see if the document
207 * element in the document is a DOM Level 1 element or a DOM Level 2 (or
208 * higher) element (this check is done by looking at the localName of the
209 * root element). If the root element is a DOM Level 1 element then the
210 * DOMWriterImpl will issue an error if a DOM Level 2 (or higher) element is
211 * found while serializing. Likewise if the document element is a DOM Level
212 * 2 (or higher) element and the DOMWriterImpl sees a DOM Level 1 element an
213 * error is issued. Mixing DOM Level 1 elements with DOM Level 2 (or higher)
214 * is not supported.
215 * <p> <code>DOMWriterImpl</code>s have a number of named features that can be
216 * queried or set. The name of <code>DOMWriterImpl</code> features must be valid
217 * XML names. Implementation specific features (extensions) should choose an
218 * implementation dependent prefix to avoid name collisions.
219 * <p>Here is a list of properties that must be recognized by all
220 * implementations.
221 * <dl>
222 * <dt><code>"normalize-characters"</code></dt>
223 * <dd>
224 * <dl>
225 * <dt><code>true</code></dt>
226 * <dd>[
227 * optional] (default) Perform the W3C Text Normalization of the characters
228 * in document as they are written out. Only the characters being written
229 * are (potentially) altered. The DOM document itself is unchanged. </dd>
230 * <dt>
231 * <code>false</code></dt>
232 * <dd>[required] do not perform character normalization. </dd>
233 * </dl></dd>
234 * <dt>
235 * <code>"split-cdata-sections"</code></dt>
236 * <dd>
237 * <dl>
238 * <dt><code>true</code></dt>
239 * <dd>[required] (default)
240 * Split CDATA sections containing the CDATA section termination marker
241 * ']]&gt;' or characters that can not be represented in the output
242 * encoding, and output the characters using numeric character references.
243 * If a CDATA section is split a warning is issued. </dd>
244 * <dt><code>false</code></dt>
245 * <dd>[
246 * required] Signal an error if a <code>CDATASection</code> contains an
247 * unrepresentable character. </dd>
248 * </dl></dd>
249 * <dt><code>"validation"</code></dt>
250 * <dd>
251 * <dl>
252 * <dt><code>true</code></dt>
253 * <dd>[
254 * optional] Use the abstract schema to validate the document as it is being
255 * serialized. If validation errors are found the error handler is notified
256 * about the error. Setting this state will also set the feature
257 * <code>use-abstract-schema</code> to <code>true</code>. </dd>
258 * <dt><code>false</code></dt>
259 * <dd>[
260 * required] (default) Don't validate the document as it is being
261 * serialized. </dd>
262 * </dl></dd>
263 * <dt><code>"expand-entity-references"</code></dt>
264 * <dd>
265 * <dl>
266 * <dt><code>true</code></dt>
267 * <dd>[
268 * optional] Expand <code>EntityReference</code> nodes when serializing. </dd>
269 * <dt>
270 * <code>false</code></dt>
271 * <dd>[required] (default) Serialize all
272 * <code>EntityReference</code> nodes as XML entity references. </dd>
273 * </dl></dd>
274 * <dt>
275 * <code>"whitespace-in-element-content"</code></dt>
276 * <dd>
277 * <dl>
278 * <dt><code>true</code></dt>
279 * <dd>[required] (
280 * default) Output all white spaces in the document. </dd>
281 * <dt><code>false</code></dt>
282 * <dd>[
283 * optional] Only output white space that is not within element content. The
284 * implementation is expected to use the
285 * <code>isWhitespaceInElementContent</code> flag on <code>Text</code> nodes
286 * to determine if a text node should be written out or not. </dd>
287 * </dl></dd>
288 * <dt>
289 * <code>"discard-default-content"</code></dt>
290 * <dd>
291 * <dl>
292 * <dt><code>true</code></dt>
293 * <dd>[required] (default
294 * ) Use whatever information available to the implementation (i.e. XML
295 * schema, DTD, the <code>specified</code> flag on <code>Attr</code> nodes,
296 * and so on) to decide what attributes and content should be serialized or
297 * not. Note that the <code>specified</code> flag on <code>Attr</code> nodes
298 * in itself is not always reliable, it is only reliable when it is set to
299 * <code>false</code> since the only case where it can be set to
300 * <code>false</code> is if the attribute was created by a Level 1
301 * implementation. </dd>
302 * <dt><code>false</code></dt>
303 * <dd>[required] Output all attributes and
304 * all content. </dd>
305 * </dl></dd>
306 * <dt><code>"format-canonical"</code></dt>
307 * <dd>
308 * <dl>
309 * <dt><code>true</code></dt>
310 * <dd>[optional]
311 * This formatting writes the document according to the rules specified in .
312 * Setting this feature to true will set the feature "format-pretty-print"
313 * to false. </dd>
314 * <dt><code>false</code></dt>
315 * <dd>[required] (default) Don't canonicalize the
316 * output. </dd>
317 * </dl></dd>
318 * <dt><code>"format-pretty-print"</code></dt>
319 * <dd>
320 * <dl>
321 * <dt><code>true</code></dt>
322 * <dd>[optional]
323 * Formatting the output by adding whitespace to produce a pretty-printed,
324 * indented, human-readable form. The exact form of the transformations is
325 * not specified by this specification. Setting this feature to true will
326 * set the feature "format-canonical" to false. </dd>
327 * <dt><code>false</code></dt>
328 * <dd>[required]
329 * (default) Don't pretty-print the result. </dd>
330 * </dl></dd>
331 * </dl>
332 * <p>See also the <a href='http://www.w3.org/TR/2001/WD-DOM-Level-3-ASLS-20011025'>Document Object Model (DOM) Level 3 Abstract Schemas and Load
333 * and Save Specification</a>.
334 */
335
336#ifndef DOMWriterImpl_HEADER_GUARD_
337#define DOMWriterImpl_HEADER_GUARD_
338
339#include <xercesc/dom/DOM.hpp>
340#include <xercesc/dom/DOMWriter.hpp>
341#include <xercesc/util/XMLDOMMsg.hpp>
342#include <xercesc/util/RefHashTableOf.hpp>
343#include <xercesc/util/RefVectorOf.hpp>
344
345XERCES_CPP_NAMESPACE_BEGIN
346
347
348class CDOM_EXPORT DOMWriterImpl:public XMemory,
349                                public DOMWriter {
350
351public:
352
353    /** @name Constructor and Destructor */
354    //@{
355
356    /**
357     * Constructor.
358     */
359    DOMWriterImpl(MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager);
360
361    /**
362     * Destructor.
363     */
364    ~DOMWriterImpl();
365    //@}
366
367    /** @name Inplementation of Abstract interface */
368
369    virtual bool               canSetFeature(const XMLCh* const featName
370                                           , bool               state) const;
371
372    virtual void               setFeature(const XMLCh* const featName
373                                        , bool               state);
374    virtual bool               getFeature(const XMLCh* const featName) const;
375
376    virtual void               setEncoding(const XMLCh* const encoding);
377    virtual const XMLCh*       getEncoding() const;
378
379    virtual void               setNewLine(const XMLCh* const newLine);
380    virtual const XMLCh*       getNewLine() const;
381
382    virtual void               setErrorHandler(DOMErrorHandler *errorHandler);
383    virtual DOMErrorHandler*   getErrorHandler() const;
384
385    virtual void               setFilter(DOMWriterFilter *filter);
386    virtual DOMWriterFilter*   getFilter() const;
387
388    virtual bool               writeNode(XMLFormatTarget* const destination
389                                       , const DOMNode         &nodeToWrite);
390    virtual void               release();
391
392    /**
393          *  The caller is responsible for the release of the returned string
394          */
395
396    virtual XMLCh*             writeToString(const DOMNode &nodeToWrite);
397    //@}
398
399private:
400
401    /** unimplemented copy ctor and assignment operator */
402    DOMWriterImpl(const DOMWriterImpl&);
403    DOMWriterImpl & operator = (const DOMWriterImpl&);
404
405    /** helper **/
406    void                          initSession(const DOMNode* const);
407    void                          processNode(const DOMNode* const);
408
409    void                          procCdataSection(const XMLCh*   const nodeValue
410                                                 , const DOMNode* const nodeToWrite
411                                                 , int level);
412
413    void                          procUnrepCharInCdataSection(const XMLCh*   const nodeValue
414                                                            , const DOMNode* const nodeToWrite
415                                                            , int level);
416
417protected:
418    /**
419     * Overidden by derived classes to extend the abilities of the standard writer
420     * always returns false in the default implementation
421     * @return true if the method deals with nodeToWrite
422     */
423    virtual bool                          customNodeSerialize(const DOMNode* const nodeToWrite, int level);
424
425    DOMNodeFilter::FilterAction   checkFilter(const DOMNode* const) const;
426
427    bool                          checkFeature(const XMLCh* const featName
428                                             , bool               state
429                                             , int&               featureId) const;
430
431    bool                          reportError(const DOMNode* const    errorNode
432                                            , DOMError::ErrorSeverity errorType
433                                            , const XMLCh*   const    errorMsg);
434    bool                          reportError(const DOMNode* const    errorNode
435                                            , DOMError::ErrorSeverity errorType
436                                            , XMLDOMMsg::Codes        toEmit);
437
438    bool                          canSetFeature(const int featureId
439                                              , bool      val)     const;
440    void                          setFeature(const int featureId
441                                           , bool      val);
442    bool                          getFeature(const int featureId) const;
443
444    void                          printNewLine();
445    void                          setURCharRef();
446
447
448    void printIndent(int level) const;
449    //does the actual work for processNode while keeping track of the level
450    void processNode(const DOMNode* const nodeToWrite, int level);
451
452    void processBOM();
453
454    // -----------------------------------------------------------------------
455    //  Private data members
456    //
457    //  fFeatures
458    //
459    //  fEncoding
460    //      own it
461    //
462    //  fNewLine
463    //      own it
464    //
465    //  fErrorHandler
466    //      don't own it
467    //
468    //  fFilter
469    //      don't own it
470    //
471    //  fDocumentVersion
472    //      The XML Version of the document to be serialized.
473    //
474    //  fEncodingUsed (session var)
475    //      the actual encoding used in WriteNode(),
476    //      it does not own any data(memory).
477    //
478    //  fNewLineUsed (session var)
479    //      the actual "end of line" sequence used in WriteNode(),
480    //      it does not own any data(memory).
481    //
482    //  fFormatter (session var)
483    //      the formatter used in WriteNode()
484    //
485    //  fErrorCount
486    //      the count of error encountered in the serialization,
487    //      which neither the error handler, nor the serializer itself,
488    //      treat as fatal. And the serializer will return true/false
489    //      based on this value.
490    //
491    //  fCurrentLine
492    //      the current line. Used to track the line number the current
493    //      node begins on
494    //
495    // -----------------------------------------------------------------------
496
497    int                           fFeatures;
498    XMLCh                        *fEncoding;
499    XMLCh                        *fNewLine;
500    DOMErrorHandler              *fErrorHandler;
501    DOMWriterFilter              *fFilter;
502    const XMLCh                  *fDocumentVersion;
503
504    //session vars
505    const XMLCh                  *fEncodingUsed;
506    const XMLCh                  *fNewLineUsed;
507    XMLFormatter                 *fFormatter;
508    int                           fErrorCount;
509    int                           fCurrentLine;
510
511    RefVectorOf< RefHashTableOf<XMLCh> >* fNamespaceStack;
512    MemoryManager*               fMemoryManager;
513};
514
515inline void DOMWriterImpl::setFeature(const int featureId
516                                    , bool      val)
517{
518    (val)? fFeatures |= (1<<featureId) : fFeatures &= ~(1<<featureId);
519};
520
521inline bool DOMWriterImpl::getFeature(const int featureId) const
522{
523    return ((fFeatures & ( 1<<featureId )) != 0) ? true : false;
524};
525
526inline void DOMWriterImpl::setURCharRef()
527{
528    fFormatter->setUnRepFlags(XMLFormatter::UnRep_CharRef);
529}
530
531XERCES_CPP_NAMESPACE_END
532
533#endif
Note: See TracBrowser for help on using the repository browser.