source: NonGTP/Xerces/xerces/samples/DOMPrint/DOMPrint.cpp @ 358

Revision 358, 20.0 KB checked in by bittner, 19 years ago (diff)

xerces added

Line 
1/*
2 * Copyright 2002,2004 The Apache Software Foundation.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17/*
18 * $Id: DOMPrint.cpp,v 1.61 2004/09/08 13:55:31 peiyongz Exp $
19 */
20
21// ---------------------------------------------------------------------------
22//  This sample program invokes the XercesDOMParser to build a DOM tree for
23//  the specified input file. It then invokes DOMWriter::writeNode() to
24//  serialize the resultant DOM tree back to XML stream.
25//
26//  Note:
27//  Application needs to provide its own implementation of
28//                 DOMErrorHandler (in this sample, the DOMPrintErrorHandler),
29//                 if it would like to receive notification from the serializer
30//                 in the case any error occurs during the serialization.
31//
32//  Application needs to provide its own implementation of
33//                 DOMWriterFilter (in this sample, the DOMPrintFilter),
34//                 if it would like to filter out certain part of the DOM
35//                 representation, but must be aware that thus may render the
36//                 resultant XML stream invalid.
37//
38//  Application may choose any combination of characters as the
39//                 end of line sequence to be used in the resultant XML stream,
40//                 but must be aware that thus may render the resultant XML
41//                 stream ill formed.
42//
43//  Application may choose a particular encoding name in which
44//                 the output XML stream would be, but must be aware that if
45//                 characters, unrepresentable in the encoding specified, appearing
46//                 in markups, may force the serializer to terminate serialization
47//                 prematurely, and thus no complete serialization would be done.
48//
49//  Application shall query the serializer first, before set any
50//           feature/mode(true, false), or be ready to catch exception if this
51//           feature/mode is not supported by the serializer.
52//
53//  Application needs to clean up the filter, error handler and
54//                 format target objects created for the serialization.
55//
56//   Limitations:
57//      1.  The encoding="xxx" clause in the XML header should reflect
58//          the system local code page, but does not.
59//      2.  Cases where the XML data contains characters that can not
60//          be represented in the system local code page are not handled.
61//
62// ---------------------------------------------------------------------------
63
64
65// ---------------------------------------------------------------------------
66//  Includes
67// ---------------------------------------------------------------------------
68#include <xercesc/util/PlatformUtils.hpp>
69
70#include <xercesc/dom/DOM.hpp>
71#include <xercesc/dom/DOMImplementation.hpp>
72#include <xercesc/dom/DOMImplementationLS.hpp>
73#include <xercesc/dom/DOMWriter.hpp>
74
75#include <xercesc/framework/StdOutFormatTarget.hpp>
76#include <xercesc/framework/LocalFileFormatTarget.hpp>
77#include <xercesc/parsers/XercesDOMParser.hpp>
78#include <xercesc/util/XMLUni.hpp>
79
80#include "DOMTreeErrorReporter.hpp"
81#include "DOMPrintFilter.hpp"
82#include "DOMPrintErrorHandler.hpp"
83#include <xercesc/util/OutOfMemoryException.hpp>
84
85#include <string.h>
86#include <stdlib.h>
87
88// ---------------------------------------------------------------------------
89//  Local data
90//
91//  gXmlFile
92//      The path to the file to parser. Set via command line.
93//
94//  gDoNamespaces
95//      Indicates whether namespace processing should be done.
96//
97//  gDoSchema
98//      Indicates whether schema processing should be done.
99//
100//  gSchemaFullChecking
101//      Indicates whether full schema constraint checking should be done.
102//
103//  gDoCreate
104//      Indicates whether entity reference nodes needs to be created or not
105//      Defaults to false
106//
107//  gOutputEncoding
108//      The encoding we are to output in. If not set on the command line,
109//      then it is defaults to the encoding of the input XML file.
110//
111//  gSplitCdataSections
112//      Indicates whether split-cdata-sections is to be enabled or not.
113//
114//  gDiscardDefaultContent
115//      Indicates whether default content is discarded or not.
116//
117//  gUseFilter
118//      Indicates if user wants to plug in the DOMPrintFilter.
119//
120//  gValScheme
121//      Indicates what validation scheme to use. It defaults to 'auto', but
122//      can be set via the -v= command.
123//
124// ---------------------------------------------------------------------------
125static char*                    gXmlFile               = 0;
126static bool                     gDoNamespaces          = false;
127static bool                     gDoSchema              = false;
128static bool                     gSchemaFullChecking    = false;
129static bool                     gDoCreate              = false;
130
131static char*                    goutputfile            = 0;
132// options for DOMWriter's features
133static XMLCh*                   gOutputEncoding        = 0;
134
135static bool                     gSplitCdataSections    = true;
136static bool                     gDiscardDefaultContent = true;
137static bool                     gUseFilter             = false;
138static bool                     gFormatPrettyPrint     = false;
139static bool                     gWriteBOM              = false;
140
141static XercesDOMParser::ValSchemes    gValScheme       = XercesDOMParser::Val_Auto;
142
143
144//      Prototypes for internally used functions
145void usage();
146
147
148// ---------------------------------------------------------------------------
149//
150//  Usage()
151//
152// ---------------------------------------------------------------------------
153void usage()
154{
155    XERCES_STD_QUALIFIER cout << "\nUsage:\n"
156            "    DOMPrint [options] <XML file>\n\n"
157            "This program invokes the DOM parser, and builds the DOM tree.\n"
158            "It then asks the DOMWriter to serialize the DOM tree.\n"
159            "Options:\n"
160            "    -e          create entity reference nodes. Default is no expansion.\n"
161            "    -v=xxx      Validation scheme [always | never | auto*].\n"
162            "    -n          Enable namespace processing. Default is off.\n"
163            "    -s          Enable schema processing. Default is off.\n"
164            "    -f          Enable full schema constraint checking. Defaults is off.\n"
165            "    -wenc=XXX   Use a particular encoding for output. Default is\n"
166            "                the same encoding as the input XML file. UTF-8 if\n"
167            "                input XML file has not XML declaration.\n"
168            "    -wfile=xxx  Write to a file instead of stdout.\n"
169            "    -wscs=xxx   Enable/Disable split-cdata-sections.      Default on\n"
170            "    -wddc=xxx   Enable/Disable discard-default-content.   Default on\n"
171            "    -wflt=xxx   Enable/Disable filtering.                 Default off\n"
172            "    -wfpp=xxx   Enable/Disable format-pretty-print.       Default off\n"
173            "    -wbom=xxx   Enable/Disable write Byte-Order-Mark      Default off\n"
174            "    -?          Show this help.\n\n"
175            "  * = Default if not provided explicitly.\n\n"
176            "The parser has intrinsic support for the following encodings:\n"
177            "    UTF-8, USASCII, ISO8859-1, UTF-16[BL]E, UCS-4[BL]E,\n"
178            "    WINDOWS-1252, IBM1140, IBM037, IBM1047.\n"
179          <<  XERCES_STD_QUALIFIER endl;
180}
181
182// ---------------------------------------------------------------------------
183//
184//  main
185//
186// ---------------------------------------------------------------------------
187int main(int argC, char* argV[])
188{
189    int retval = 0;
190
191    // Initialize the XML4C2 system
192    try
193    {
194        XMLPlatformUtils::Initialize();
195    }
196
197    catch(const XMLException &toCatch)
198    {
199        XERCES_STD_QUALIFIER cerr << "Error during Xerces-c Initialization.\n"
200             << "  Exception message:"
201             << StrX(toCatch.getMessage()) << XERCES_STD_QUALIFIER endl;
202        return 1;
203    }
204
205    // Check command line and extract arguments.
206    if (argC < 2)
207    {
208        usage();
209        XMLPlatformUtils::Terminate();
210        return 1;
211    }
212
213    // See if non validating dom parser configuration is requested.
214    int parmInd;
215    for (parmInd = 1; parmInd < argC; parmInd++)
216    {
217        // Break out on first parm not starting with a dash
218        if (argV[parmInd][0] != '-')
219            break;
220
221        // Watch for special case help request
222        if (!strcmp(argV[parmInd], "-?"))
223        {
224            usage();
225            XMLPlatformUtils::Terminate();
226            return 2;
227        }
228         else if (!strncmp(argV[parmInd], "-v=", 3)
229              ||  !strncmp(argV[parmInd], "-V=", 3))
230        {
231            const char* const parm = &argV[parmInd][3];
232
233            if (!strcmp(parm, "never"))
234                gValScheme = XercesDOMParser::Val_Never;
235            else if (!strcmp(parm, "auto"))
236                gValScheme = XercesDOMParser::Val_Auto;
237            else if (!strcmp(parm, "always"))
238                gValScheme = XercesDOMParser::Val_Always;
239            else
240            {
241                XERCES_STD_QUALIFIER cerr << "Unknown -v= value: " << parm << XERCES_STD_QUALIFIER endl;
242                XMLPlatformUtils::Terminate();
243                return 2;
244            }
245        }
246         else if (!strcmp(argV[parmInd], "-n")
247              ||  !strcmp(argV[parmInd], "-N"))
248        {
249            gDoNamespaces = true;
250        }
251         else if (!strcmp(argV[parmInd], "-s")
252              ||  !strcmp(argV[parmInd], "-S"))
253        {
254            gDoSchema = true;
255        }
256         else if (!strcmp(argV[parmInd], "-f")
257              ||  !strcmp(argV[parmInd], "-F"))
258        {
259            gSchemaFullChecking = true;
260        }
261         else if (!strcmp(argV[parmInd], "-e")
262              ||  !strcmp(argV[parmInd], "-E"))
263        {
264            gDoCreate = true;
265        }
266         else if (!strncmp(argV[parmInd], "-wenc=", 6))
267        {
268             // Get out the encoding name
269             gOutputEncoding = XMLString::transcode( &(argV[parmInd][6]) );
270        }
271         else if (!strncmp(argV[parmInd], "-wfile=", 7))
272        {
273             goutputfile =  &(argV[parmInd][7]);
274        }
275         else if (!strncmp(argV[parmInd], "-wddc=", 6))
276        {
277            const char* const parm = &argV[parmInd][6];
278
279            if (!strcmp(parm, "on"))
280                                gDiscardDefaultContent = true;
281            else if (!strcmp(parm, "off"))
282                                gDiscardDefaultContent = false;
283            else
284            {
285                XERCES_STD_QUALIFIER cerr << "Unknown -wddc= value: " << parm << XERCES_STD_QUALIFIER endl;
286                XMLPlatformUtils::Terminate();
287                return 2;
288            }
289
290        }
291         else if (!strncmp(argV[parmInd], "-wscs=", 6))
292        {
293            const char* const parm = &argV[parmInd][6];
294
295            if (!strcmp(parm, "on"))
296                                gSplitCdataSections = true;
297                        else if (!strcmp(parm, "off"))
298                                gSplitCdataSections = false;
299            else
300            {
301                XERCES_STD_QUALIFIER cerr << "Unknown -wscs= value: " << parm << XERCES_STD_QUALIFIER endl;
302                XMLPlatformUtils::Terminate();
303                return 2;
304            }
305        }
306         else if (!strncmp(argV[parmInd], "-wflt=", 6))
307        {
308            const char* const parm = &argV[parmInd][6];
309
310            if (!strcmp(parm, "on"))
311                                gUseFilter = true;
312                        else if (!strcmp(parm, "off"))
313                                gUseFilter = false;
314            else
315            {
316                XERCES_STD_QUALIFIER cerr << "Unknown -wflt= value: " << parm << XERCES_STD_QUALIFIER endl;
317                XMLPlatformUtils::Terminate();
318                return 2;
319            }
320        }
321         else if (!strncmp(argV[parmInd], "-wfpp=", 6))
322        {
323            const char* const parm = &argV[parmInd][6];
324
325            if (!strcmp(parm, "on"))
326                                gFormatPrettyPrint = true;
327                        else if (!strcmp(parm, "off"))
328                                gFormatPrettyPrint = false;
329            else
330            {
331                XERCES_STD_QUALIFIER cerr << "Unknown -wfpp= value: " << parm << XERCES_STD_QUALIFIER endl;
332                XMLPlatformUtils::Terminate();
333                return 2;
334            }
335        }
336         else if (!strncmp(argV[parmInd], "-wbom=", 6))
337        {
338            const char* const parm = &argV[parmInd][6];
339
340            if (!strcmp(parm, "on"))
341                gWriteBOM = true;
342            else if (!strcmp(parm, "off"))
343                gWriteBOM = false;
344            else
345            {
346                XERCES_STD_QUALIFIER cerr << "Unknown -wbom= value: " << parm << XERCES_STD_QUALIFIER endl;
347                XMLPlatformUtils::Terminate();
348                return 2;
349            }
350        }
351         else
352        {
353            XERCES_STD_QUALIFIER cerr << "Unknown option '" << argV[parmInd]
354                 << "', ignoring it.\n" << XERCES_STD_QUALIFIER endl;
355        }
356    }
357
358    //
359    //  And now we have to have only one parameter left and it must be
360    //  the file name.
361    //
362    if (parmInd + 1 != argC)
363    {
364        usage();
365        XMLPlatformUtils::Terminate();
366        return 1;
367    }
368    gXmlFile = argV[parmInd];
369
370    //
371    //  Create our parser, then attach an error handler to the parser.
372    //  The parser will call back to methods of the ErrorHandler if it
373    //  discovers errors during the course of parsing the XML document.
374    //
375    XercesDOMParser *parser = new XercesDOMParser;
376    parser->setValidationScheme(gValScheme);
377    parser->setDoNamespaces(gDoNamespaces);
378    parser->setDoSchema(gDoSchema);
379    parser->setValidationSchemaFullChecking(gSchemaFullChecking);
380    parser->setCreateEntityReferenceNodes(gDoCreate);
381
382    DOMTreeErrorReporter *errReporter = new DOMTreeErrorReporter();
383    parser->setErrorHandler(errReporter);
384
385    //
386    //  Parse the XML file, catching any XML exceptions that might propogate
387    //  out of it.
388    //
389    bool errorsOccured = false;
390    try
391    {
392        parser->parse(gXmlFile);
393    }
394    catch (const OutOfMemoryException&)
395    {
396        XERCES_STD_QUALIFIER cerr << "OutOfMemoryException" << XERCES_STD_QUALIFIER endl;
397        errorsOccured = true;
398    }
399    catch (const XMLException& e)
400    {
401        XERCES_STD_QUALIFIER cerr << "An error occurred during parsing\n   Message: "
402             << StrX(e.getMessage()) << XERCES_STD_QUALIFIER endl;
403        errorsOccured = true;
404    }
405
406    catch (const DOMException& e)
407    {
408        const unsigned int maxChars = 2047;
409        XMLCh errText[maxChars + 1];
410
411        XERCES_STD_QUALIFIER cerr << "\nDOM Error during parsing: '" << gXmlFile << "'\n"
412             << "DOMException code is:  " << e.code << XERCES_STD_QUALIFIER endl;
413
414        if (DOMImplementation::loadDOMExceptionMsg(e.code, errText, maxChars))
415             XERCES_STD_QUALIFIER cerr << "Message is: " << StrX(errText) << XERCES_STD_QUALIFIER endl;
416
417        errorsOccured = true;
418    }
419
420    catch (...)
421    {
422        XERCES_STD_QUALIFIER cerr << "An error occurred during parsing\n " << XERCES_STD_QUALIFIER endl;
423        errorsOccured = true;
424    }
425
426    // If the parse was successful, output the document data from the DOM tree
427    if (!errorsOccured && !errReporter->getSawErrors())
428    {
429        DOMPrintFilter   *myFilter = 0;
430
431        try
432        {
433            // get a serializer, an instance of DOMWriter
434            XMLCh tempStr[100];
435            XMLString::transcode("LS", tempStr, 99);
436            DOMImplementation *impl          = DOMImplementationRegistry::getDOMImplementation(tempStr);
437            DOMWriter         *theSerializer = ((DOMImplementationLS*)impl)->createDOMWriter();
438
439            // set user specified output encoding
440            theSerializer->setEncoding(gOutputEncoding);
441
442            // plug in user's own filter
443            if (gUseFilter)
444            {
445                // even we say to show attribute, but the DOMWriter
446                // will not show attribute nodes to the filter as
447                // the specs explicitly says that DOMWriter shall
448                // NOT show attributes to DOMWriterFilter.
449                //
450                // so DOMNodeFilter::SHOW_ATTRIBUTE has no effect.
451                // same DOMNodeFilter::SHOW_DOCUMENT_TYPE, no effect.
452                //
453                myFilter = new DOMPrintFilter(DOMNodeFilter::SHOW_ELEMENT   |
454                                              DOMNodeFilter::SHOW_ATTRIBUTE |
455                                              DOMNodeFilter::SHOW_DOCUMENT_TYPE);
456                theSerializer->setFilter(myFilter);
457            }
458
459            // plug in user's own error handler
460            DOMErrorHandler *myErrorHandler = new DOMPrintErrorHandler();
461            theSerializer->setErrorHandler(myErrorHandler);
462
463            // set feature if the serializer supports the feature/mode
464            if (theSerializer->canSetFeature(XMLUni::fgDOMWRTSplitCdataSections, gSplitCdataSections))
465                theSerializer->setFeature(XMLUni::fgDOMWRTSplitCdataSections, gSplitCdataSections);
466
467            if (theSerializer->canSetFeature(XMLUni::fgDOMWRTDiscardDefaultContent, gDiscardDefaultContent))
468                theSerializer->setFeature(XMLUni::fgDOMWRTDiscardDefaultContent, gDiscardDefaultContent);
469
470            if (theSerializer->canSetFeature(XMLUni::fgDOMWRTFormatPrettyPrint, gFormatPrettyPrint))
471                theSerializer->setFeature(XMLUni::fgDOMWRTFormatPrettyPrint, gFormatPrettyPrint);
472
473            if (theSerializer->canSetFeature(XMLUni::fgDOMWRTBOM, gWriteBOM))
474                theSerializer->setFeature(XMLUni::fgDOMWRTBOM, gWriteBOM);
475
476            //
477            // Plug in a format target to receive the resultant
478            // XML stream from the serializer.
479            //
480            // StdOutFormatTarget prints the resultant XML stream
481            // to stdout once it receives any thing from the serializer.
482            //
483            XMLFormatTarget *myFormTarget;
484            if (goutputfile)
485                myFormTarget = new LocalFileFormatTarget(goutputfile);
486            else
487                myFormTarget = new StdOutFormatTarget();
488
489            // get the DOM representation
490            DOMNode                     *doc = parser->getDocument();
491
492            //
493            // do the serialization through DOMWriter::writeNode();
494            //
495            theSerializer->writeNode(myFormTarget, *doc);
496
497            delete theSerializer;
498
499            //
500            // Filter, formatTarget and error handler
501            // are NOT owned by the serializer.
502            //
503            delete myFormTarget;
504            delete myErrorHandler;
505
506            if (gUseFilter)
507                delete myFilter;
508
509        }
510        catch (const OutOfMemoryException&)
511        {
512            XERCES_STD_QUALIFIER cerr << "OutOfMemoryException" << XERCES_STD_QUALIFIER endl;
513            retval = 5;
514        }
515        catch (XMLException& e)
516        {
517            XERCES_STD_QUALIFIER cerr << "An error occurred during creation of output transcoder. Msg is:"
518                << XERCES_STD_QUALIFIER endl
519                << StrX(e.getMessage()) << XERCES_STD_QUALIFIER endl;
520            retval = 4;
521        }
522
523    }
524    else
525        retval = 4;
526
527    //
528    //  Clean up the error handler. The parser does not adopt handlers
529    //  since they could be many objects or one object installed for multiple
530    //  handlers.
531    //
532    delete errReporter;
533
534    //
535    //  Delete the parser itself.  Must be done prior to calling Terminate, below.
536    //
537    delete parser;
538
539    // And call the termination method
540    XMLPlatformUtils::Terminate();
541
542    XMLString::release(&gOutputEncoding);
543
544    return retval;
545}
546
Note: See TracBrowser for help on using the repository browser.