source: NonGTP/Xerces/xerces/include/xercesc/dom/DOMWriter.hpp @ 358

Revision 358, 25.0 KB checked in by bittner, 19 years ago (diff)

xerces added

Line 
1#ifndef DOMWriter_HEADER_GUARD_
2#define DOMWriter_HEADER_GUARD_
3
4/*
5 * Copyright 2002,2004 The Apache Software Foundation.
6 *
7 * Licensed under the Apache License, Version 2.0 (the "License");
8 * you may not use this file except in compliance with the License.
9 * You may obtain a copy of the License at
10 *
11 *      http://www.apache.org/licenses/LICENSE-2.0
12 *
13 * Unless required by applicable law or agreed to in writing, software
14 * distributed under the License is distributed on an "AS IS" BASIS,
15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16 * See the License for the specific language governing permissions and
17 * limitations under the License.
18 */
19
20/*
21 * $Id: DOMWriter.hpp,v 1.11 2004/09/08 13:55:39 peiyongz Exp $
22 * $Log: DOMWriter.hpp,v $
23 * Revision 1.11  2004/09/08 13:55:39  peiyongz
24 * Apache License Version 2.0
25 *
26 * Revision 1.10  2003/03/07 19:59:09  tng
27 * [Bug 11692] Unimplement the hidden constructors and assignment operator to remove warnings from gcc.
28 *
29 * Revision 1.9  2002/11/27 18:46:39  tng
30 * Documentation: clarify that DOMWriter::writeToString always returns string in UTF-16
31 *
32 * Revision 1.8  2002/11/04 15:09:25  tng
33 * C++ Namespace Support.
34 *
35 * Revision 1.7  2002/08/22 15:04:57  tng
36 * Remove unused parameter variables in inline functions.
37 *
38 * Revision 1.6  2002/06/25 16:04:49  tng
39 * DOM L3: add function release().
40 *
41 * Revision 1.5  2002/06/06 20:53:07  tng
42 * Documentation Fix: Update the API Documentation for DOM headers
43 *
44 * Revision 1.4  2002/06/03 22:33:21  peiyongz
45 * DOMWriter: constants moved to XMLUni
46 *
47 * Revision 1.3  2002/05/31 20:59:40  peiyongz
48 * Add "introduced in DOM3"
49 *
50 * Revision 1.2  2002/05/30 16:25:33  tng
51 * Fix doxygen warning message.
52 *
53 * Revision 1.1  2002/05/28 22:38:55  peiyongz
54 * DOM3 Save Interface: DOMWriter/DOMWriterFilter
55 *
56 */
57
58/**
59 *
60 * DOMWriter provides an API for serializing (writing) a DOM document out in
61 * an XML document. The XML data is written to an output stream, the type of
62 * which depends on the specific language bindings in use. During
63 * serialization of XML data, namespace fixup is done when possible.
64 * <p> <code>DOMWriter</code> accepts any node type for serialization. For
65 * nodes of type <code>Document</code> or <code>Entity</code>, well formed
66 * XML will be created if possible. The serialized output for these node
67 * types is either as a Document or an External Entity, respectively, and is
68 * acceptable input for an XML parser. For all other types of nodes the
69 * serialized form is not specified, but should be something useful to a
70 * human for debugging or diagnostic purposes. Note: rigorously designing an
71 * external (source) form for stand-alone node types that don't already have
72 * one defined in  seems a bit much to take on here.
73 * <p>Within a Document or Entity being serialized, Nodes are processed as
74 * follows Documents are written including an XML declaration and a DTD
75 * subset, if one exists in the DOM. Writing a document node serializes the
76 * entire document.  Entity nodes, when written directly by
77 * <code>writeNode</code> defined in the <code>DOMWriter</code> interface,
78 * output the entity expansion but no namespace fixup is done. The resulting
79 * output will be valid as an external entity.  Entity References nodes are
80 * serializes as an entity reference of the form
81 * <code>"&amp;entityName;"</code>) in the output. Child nodes (the
82 * expansion) of the entity reference are ignored.  CDATA sections
83 * containing content characters that can not be represented in the
84 * specified output encoding are handled according to the
85 * "split-cdata-sections" feature.If the feature is <code>true</code>, CDATA
86 * sections are split, and the unrepresentable characters are serialized as
87 * numeric character references in ordinary content. The exact position and
88 * number of splits is not specified. If the feature is <code>false</code>,
89 * unrepresentable characters in a CDATA section are reported as errors. The
90 * error is not recoverable - there is no mechanism for supplying
91 * alternative characters and continuing with the serialization. All other
92 * node types (DOMElement, DOMText, etc.) are serialized to their corresponding
93 * XML source form.
94 * <p> Within the character data of a document (outside of markup), any
95 * characters that cannot be represented directly are replaced with
96 * character references. Occurrences of '&lt;' and '&amp;' are replaced by
97 * the predefined entities &amp;lt; and &amp;amp. The other predefined
98 * entities (&amp;gt, &amp;apos, etc.) are not used; these characters can be
99 * included directly. Any character that can not be represented directly in
100 * the output character encoding is serialized as a numeric character
101 * reference.
102 * <p> Attributes not containing quotes are serialized in quotes. Attributes
103 * containing quotes but no apostrophes are serialized in apostrophes
104 * (single quotes). Attributes containing both forms of quotes are
105 * serialized in quotes, with quotes within the value represented by the
106 * predefined entity &amp;quot;. Any character that can not be represented
107 * directly in the output character encoding is serialized as a numeric
108 * character reference.
109 * <p> Within markup, but outside of attributes, any occurrence of a character
110 * that cannot be represented in the output character encoding is reported
111 * as an error. An example would be serializing the element
112 * &lt;LaCañada/&gt; with the encoding="us-ascii".
113 * <p> When requested by setting the <code>normalize-characters</code> feature
114 * on <code>DOMWriter</code>, all data to be serialized, both markup and
115 * character data, is W3C Text normalized according to the rules defined in
116 * . The W3C Text normalization process affects only the data as it is being
117 * written; it does not alter the DOM's view of the document after
118 * serialization has completed.
119 * <p>Namespaces are fixed up during serialization, the serialization process
120 * will verify that namespace declarations, namespace prefixes and the
121 * namespace URIs associated with Elements and Attributes are consistent. If
122 * inconsistencies are found, the serialized form of the document will be
123 * altered to remove them. The algorithm used for doing the namespace fixup
124 * while seralizing a document is a combination of the algorithms used for
125 * lookupNamespaceURI and lookupNamespacePrefix . previous paragraph to be
126 * defined closer here.
127 * <p>Any changes made affect only the namespace prefixes and declarations
128 * appearing in the serialized data. The DOM's view of the document is not
129 * altered by the serialization operation, and does not reflect any changes
130 * made to namespace declarations or prefixes in the serialized output.
131 * <p> While serializing a document the serializer will write out
132 * non-specified values (such as attributes whose <code>specified</code> is
133 * <code>false</code>) if the <code>output-default-values</code> feature is
134 * set to <code>true</code>. If the <code>output-default-values</code> flag
135 * is set to <code>false</code> and the <code>use-abstract-schema</code>
136 * feature is set to <code>true</code> the abstract schema will be used to
137 * determine if a value is specified or not, if
138 * <code>use-abstract-schema</code> is not set the <code>specified</code>
139 * flag on attribute nodes is used to determine if attribute values should
140 * be written out.
141 * <p> Ref to Core spec (1.1.9, XML namespaces, 5th paragraph) entity ref
142 * description about warning about unbound entity refs. Entity refs are
143 * always serialized as &amp;foo;, also mention this in the load part of
144 * this spec.
145 * <p> When serializing a document the DOMWriter checks to see if the document
146 * element in the document is a DOM Level 1 element or a DOM Level 2 (or
147 * higher) element (this check is done by looking at the localName of the
148 * root element). If the root element is a DOM Level 1 element then the
149 * DOMWriter will issue an error if a DOM Level 2 (or higher) element is
150 * found while serializing. Likewise if the document element is a DOM Level
151 * 2 (or higher) element and the DOMWriter sees a DOM Level 1 element an
152 * error is issued. Mixing DOM Level 1 elements with DOM Level 2 (or higher)
153 * is not supported.
154 * <p> <code>DOMWriter</code>s have a number of named features that can be
155 * queried or set. The name of <code>DOMWriter</code> features must be valid
156 * XML names. Implementation specific features (extensions) should choose an
157 * implementation dependent prefix to avoid name collisions.
158 * <p>Here is a list of properties that must be recognized by all
159 * implementations.
160 * <dl>
161 * <dt><code>"normalize-characters"</code></dt>
162 * <dd>
163 * <dl>
164 * <dt><code>true</code></dt>
165 * <dd>[
166 * optional] (default) Perform the W3C Text Normalization of the characters
167 * in document as they are written out. Only the characters being written
168 * are (potentially) altered. The DOM document itself is unchanged. </dd>
169 * <dt>
170 * <code>false</code></dt>
171 * <dd>[required] do not perform character normalization. </dd>
172 * </dl></dd>
173 * <dt>
174 * <code>"split-cdata-sections"</code></dt>
175 * <dd>
176 * <dl>
177 * <dt><code>true</code></dt>
178 * <dd>[required] (default)
179 * Split CDATA sections containing the CDATA section termination marker
180 * ']]&gt;' or characters that can not be represented in the output
181 * encoding, and output the characters using numeric character references.
182 * If a CDATA section is split a warning is issued. </dd>
183 * <dt><code>false</code></dt>
184 * <dd>[
185 * required] Signal an error if a <code>CDATASection</code> contains an
186 * unrepresentable character. </dd>
187 * </dl></dd>
188 * <dt><code>"validation"</code></dt>
189 * <dd>
190 * <dl>
191 * <dt><code>true</code></dt>
192 * <dd>[
193 * optional] Use the abstract schema to validate the document as it is being
194 * serialized. If validation errors are found the error handler is notified
195 * about the error. Setting this state will also set the feature
196 * <code>use-abstract-schema</code> to <code>true</code>. </dd>
197 * <dt><code>false</code></dt>
198 * <dd>[
199 * required] (default) Don't validate the document as it is being
200 * serialized. </dd>
201 * </dl></dd>
202 * <dt><code>"expand-entity-references"</code></dt>
203 * <dd>
204 * <dl>
205 * <dt><code>true</code></dt>
206 * <dd>[
207 * optional] Expand <code>EntityReference</code> nodes when serializing. </dd>
208 * <dt>
209 * <code>false</code></dt>
210 * <dd>[required] (default) Serialize all
211 * <code>EntityReference</code> nodes as XML entity references. </dd>
212 * </dl></dd>
213 * <dt>
214 * <code>"whitespace-in-element-content"</code></dt>
215 * <dd>
216 * <dl>
217 * <dt><code>true</code></dt>
218 * <dd>[required] (
219 * default) Output all white spaces in the document. </dd>
220 * <dt><code>false</code></dt>
221 * <dd>[
222 * optional] Only output white space that is not within element content. The
223 * implementation is expected to use the
224 * <code>isWhitespaceInElementContent</code> flag on <code>Text</code> nodes
225 * to determine if a text node should be written out or not. </dd>
226 * </dl></dd>
227 * <dt>
228 * <code>"discard-default-content"</code></dt>
229 * <dd>
230 * <dl>
231 * <dt><code>true</code></dt>
232 * <dd>[required] (default
233 * ) Use whatever information available to the implementation (i.e. XML
234 * schema, DTD, the <code>specified</code> flag on <code>Attr</code> nodes,
235 * and so on) to decide what attributes and content should be serialized or
236 * not. Note that the <code>specified</code> flag on <code>Attr</code> nodes
237 * in itself is not always reliable, it is only reliable when it is set to
238 * <code>false</code> since the only case where it can be set to
239 * <code>false</code> is if the attribute was created by a Level 1
240 * implementation. </dd>
241 * <dt><code>false</code></dt>
242 * <dd>[required] Output all attributes and
243 * all content. </dd>
244 * </dl></dd>
245 * <dt><code>"format-canonical"</code></dt>
246 * <dd>
247 * <dl>
248 * <dt><code>true</code></dt>
249 * <dd>[optional]
250 * This formatting writes the document according to the rules specified in .
251 * Setting this feature to true will set the feature "format-pretty-print"
252 * to false. </dd>
253 * <dt><code>false</code></dt>
254 * <dd>[required] (default) Don't canonicalize the
255 * output. </dd>
256 * </dl></dd>
257 * <dt><code>"format-pretty-print"</code></dt>
258 * <dd>
259 * <dl>
260 * <dt><code>true</code></dt>
261 * <dd>[optional]
262 * Formatting the output by adding whitespace to produce a pretty-printed,
263 * indented, human-readable form. The exact form of the transformations is
264 * not specified by this specification. Setting this feature to true will
265 * set the feature "format-canonical" to false. </dd>
266 * <dt><code>false</code></dt>
267 * <dd>[required]
268 * (default) Don't pretty-print the result. </dd>
269 * </dl></dd>
270 * </dl>
271 * <p>See also the <a href='http://www.w3.org/TR/2002/WD-DOM-Level-3-ASLS-20020409'>Document Object Model (DOM) Level 3 Abstract Schemas and Load
272 * and Save Specification</a>.
273 *
274 * @since DOM Level 3
275 */
276
277
278#include <xercesc/dom/DOMNode.hpp>
279#include <xercesc/dom/DOMWriterFilter.hpp>
280#include <xercesc/dom/DOMErrorHandler.hpp>
281#include <xercesc/framework/XMLFormatter.hpp>
282
283XERCES_CPP_NAMESPACE_BEGIN
284
285class CDOM_EXPORT DOMWriter {
286protected :
287    // -----------------------------------------------------------------------
288    //  Hidden constructors
289    // -----------------------------------------------------------------------
290    /** @name Hidden constructors */
291    //@{   
292    DOMWriter() {};
293    //@}
294private:       
295    // -----------------------------------------------------------------------
296    // Unimplemented constructors and operators
297    // -----------------------------------------------------------------------
298    /** @name Unimplemented constructors and operators */
299    //@{
300    DOMWriter(const DOMWriter &);
301    DOMWriter & operator = (const DOMWriter &);
302    //@}
303
304
305public:
306    // -----------------------------------------------------------------------
307    //  All constructors are hidden, just the destructor is available
308    // -----------------------------------------------------------------------
309    /** @name Destructor */
310    //@{
311    /**
312     * Destructor
313     *
314     */
315    virtual ~DOMWriter() {};
316    //@}
317
318    // -----------------------------------------------------------------------
319    //  Virtual DOMWriter interface
320    // -----------------------------------------------------------------------
321    /** @name Functions introduced in DOM Level 3 */
322    //@{
323    // -----------------------------------------------------------------------
324    //  Feature methods
325    // -----------------------------------------------------------------------
326    /**
327     * Query whether setting a feature to a specific value is supported.
328     * <br>The feature name has the same form as a DOM hasFeature string.
329     *
330     *  <p><b>"Experimental - subject to change"</b></p>
331     *
332     * @param featName The feature name, which is a DOM has-feature style string.
333     * @param state The requested state of the feature (<code>true</code> or
334     *   <code>false</code>).
335     * @return <code>true</code> if the feature could be successfully set to
336     *   the specified value, or <code>false</code> if the feature is not
337     *   recognized or the requested value is not supported. The value of
338     *   the feature itself is not changed.
339     * @since DOM Level 3
340     */
341    virtual bool           canSetFeature(const XMLCh* const featName
342                                       , bool               state) const = 0;
343    /**
344     * Set the state of a feature.
345     * <br>The feature name has the same form as a DOM hasFeature string.
346     * <br>It is possible for a <code>DOMWriter</code> to recognize a feature
347     * name but to be unable to set its value.
348     *
349     *  <p><b>"Experimental - subject to change"</b></p>
350     *
351     * @param featName The feature name.
352     * @param state The requested state of the feature (<code>true</code> or
353     *   <code>false</code>).
354     * @exception DOMException
355     *   Raise a NOT_SUPPORTED_ERR exception when the <code>DOMWriter</code>
356     *   recognizes the feature name but cannot set the requested value.
357     *   <br>Raise a NOT_FOUND_ERR When the <code>DOMWriter</code> does not
358     *   recognize the feature name.
359     * @see   getFeature
360     * @since DOM Level 3
361     */
362    virtual void            setFeature(const XMLCh* const featName
363                                     , bool               state) = 0;
364
365    /**
366     * Look up the value of a feature.
367     * <br>The feature name has the same form as a DOM hasFeature string
368     * @param featName The feature name, which is a string with DOM has-feature
369     *   syntax.
370     * @return The current state of the feature (<code>true</code> or
371     *   <code>false</code>).
372     * @exception DOMException
373     *   Raise a NOT_FOUND_ERR When the <code>DOMWriter</code> does not
374     *   recognize the feature name.
375     *
376     *  <p><b>"Experimental - subject to change"</b></p>
377     *
378     * @see   setFeature
379     * @since DOM Level 3
380     */
381    virtual bool               getFeature(const XMLCh* const featName) const = 0;
382
383    // -----------------------------------------------------------------------
384    //  Setter methods
385    // -----------------------------------------------------------------------
386    /**
387     * The character encoding in which the output will be written.
388     * <br> The encoding to use when writing is determined as follows: If the
389     * encoding attribute has been set, that value will be used.If the
390     * encoding attribute is <code>null</code> or empty, but the item to be
391     * written includes an encoding declaration, that value will be used.If
392     * neither of the above provides an encoding name, a default encoding of
393     * "UTF-8" will be used.
394     * <br>The default value is <code>null</code>.
395     *
396     *  <p><b>"Experimental - subject to change"</b></p>
397     *
398     * @param encoding    The character encoding in which the output will be written.
399     * @see   getEncoding
400     * @since DOM Level 3
401     */
402    virtual void           setEncoding(const XMLCh* const encoding) = 0;
403
404    /**
405     * The end-of-line sequence of characters to be used in the XML being
406     * written out. The only permitted values are these:
407     * <dl>
408     * <dt><code>null</code></dt>
409     * <dd>
410     * Use a default end-of-line sequence. DOM implementations should choose
411     * the default to match the usual convention for text files in the
412     * environment being used. Implementations must choose a default
413     * sequence that matches one of those allowed by  2.11 "End-of-Line
414     * Handling". </dd>
415     * <dt>CR</dt>
416     * <dd>The carriage-return character (#xD).</dd>
417     * <dt>CR-LF</dt>
418     * <dd> The
419     * carriage-return and line-feed characters (#xD #xA). </dd>
420     * <dt>LF</dt>
421     * <dd> The line-feed
422     * character (#xA). </dd>
423     * </dl>
424     * <br>The default value for this attribute is <code>null</code>.
425     *
426     *  <p><b>"Experimental - subject to change"</b></p>
427     *
428     * @param newLine      The end-of-line sequence of characters to be used.
429     * @see   getNewLine
430     * @since DOM Level 3
431     */
432    virtual void          setNewLine(const XMLCh* const newLine) = 0;
433
434    /**
435     * The error handler that will receive error notifications during
436     * serialization. The node where the error occured is passed to this
437     * error handler, any modification to nodes from within an error
438     * callback should be avoided since this will result in undefined,
439     * implementation dependent behavior.
440     *
441     *  <p><b>"Experimental - subject to change"</b></p>
442     *
443     * @param errorHandler The error handler to be used.
444     * @see   getErrorHandler
445     * @since DOM Level 3
446     */
447    virtual void         setErrorHandler(DOMErrorHandler *errorHandler) = 0;
448
449    /**
450     * When the application provides a filter, the serializer will call out
451     * to the filter before serializing each Node. Attribute nodes are never
452     * passed to the filter. The filter implementation can choose to remove
453     * the node from the stream or to terminate the serialization early.
454     *
455     *  <p><b>"Experimental - subject to change"</b></p>
456     *
457     * @param filter       The writer filter to be used.
458     * @see   getFilter
459     * @since DOM Level 3
460     */
461    virtual void         setFilter(DOMWriterFilter *filter) = 0;
462
463    // -----------------------------------------------------------------------
464    //  Getter methods
465    // -----------------------------------------------------------------------
466    /**
467     * Return the character encoding in which the output will be written.
468     *
469     *  <p><b>"Experimental - subject to change"</b></p>
470     *
471     * @return The character encoding used.
472     * @see   setEncoding
473     * @since DOM Level 3
474     */
475     virtual const XMLCh*       getEncoding() const = 0;
476
477    /**
478     * Return the end-of-line sequence of characters to be used in the XML being
479     * written out.
480     *
481     *  <p><b>"Experimental - subject to change"</b></p>
482     *
483     * @return             The end-of-line sequence of characters to be used.
484     * @see   setNewLine
485     * @since DOM Level 3
486     */
487     virtual const XMLCh*       getNewLine() const = 0;
488
489    /**
490     * Return the error handler that will receive error notifications during
491     * serialization.
492     *
493     *  <p><b>"Experimental - subject to change"</b></p>
494     *
495     * @return             The error handler to be used.
496     * @see   setErrorHandler
497     * @since DOM Level 3
498     */
499     virtual DOMErrorHandler*   getErrorHandler() const = 0;
500
501    /**
502     * Return the WriterFilter used.
503     *
504     *  <p><b>"Experimental - subject to change"</b></p>
505     *
506     * @return             The writer filter used.
507     * @see   setFilter
508     * @since DOM Level 3
509     */
510     virtual DOMWriterFilter*   getFilter() const = 0;
511
512    // -----------------------------------------------------------------------
513    //  Write methods
514    // -----------------------------------------------------------------------
515    /**
516     * Write out the specified node as described above in the description of
517     * <code>DOMWriter</code>. Writing a Document or Entity node produces a
518     * serialized form that is well formed XML. Writing other node types
519     * produces a fragment of text in a form that is not fully defined by
520     * this document, but that should be useful to a human for debugging or
521     * diagnostic purposes.
522     *
523     *  <p><b>"Experimental - subject to change"</b></p>
524     *
525     * @param destination The destination for the data to be written.
526     * @param nodeToWrite The <code>Document</code> or <code>Entity</code> node to
527     *   be written. For other node types, something sensible should be
528     *   written, but the exact serialized form is not specified.
529     * @return  Returns <code>true</code> if <code>node</code> was
530     *   successfully serialized and <code>false</code> in case a failure
531     *   occured and the failure wasn't canceled by the error handler.
532     * @exception DOMSystemException
533     *   This exception will be raised in response to any sort of IO or system
534     *   error that occurs while writing to the destination. It may wrap an
535     *   underlying system exception.
536     * @since DOM Level 3
537     */
538    virtual bool       writeNode(XMLFormatTarget* const destination
539                               , const DOMNode         &nodeToWrite) = 0;
540
541    /**
542     * Serialize the specified node as described above in the description of
543     * <code>DOMWriter</code>. The result of serializing the node is
544     * returned as a string. Writing a Document or Entity node produces a
545     * serialized form that is well formed XML. Writing other node types
546     * produces a fragment of text in a form that is not fully defined by
547     * this document, but that should be useful to a human for debugging or
548     * diagnostic purposes.
549     *
550     *  <p><b>"Experimental - subject to change"</b></p>
551     *
552     * @param nodeToWrite  The node to be written.
553     * @return  Returns the serialized data, or <code>null</code> in case a
554     *   failure occured and the failure wasn't canceled by the error
555     *   handler.   The returned string is always in UTF-16.
556     *   The encoding information available in DOMWriter is ignored in writeToString().
557     * @since DOM Level 3
558     */
559    virtual XMLCh*     writeToString(const DOMNode &nodeToWrite) = 0;
560
561    //@}
562
563    // -----------------------------------------------------------------------
564    //  Non-standard Extension
565    // -----------------------------------------------------------------------
566    /** @name Non-standard Extension */
567    //@{
568    /**
569     * Called to indicate that this Writer is no longer in use
570     * and that the implementation may relinquish any resources associated with it.
571     *
572     * Access to a released object will lead to unexpected result.
573     */
574    virtual void              release() = 0;
575    //@}
576
577
578};
579
580XERCES_CPP_NAMESPACE_END
581
582#endif
Note: See TracBrowser for help on using the repository browser.