source: NonGTP/Xerces/xerces/samples/DOMCount/DOMCount.cpp @ 358

Revision 358, 16.1 KB checked in by bittner, 19 years ago (diff)

xerces added

Line 
1/*
2 * Copyright 1999-2002,2004 The Apache Software Foundation.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17/*
18 * $Id: DOMCount.cpp,v 1.39 2004/09/08 13:55:31 peiyongz Exp $
19 */
20
21// ---------------------------------------------------------------------------
22//  Includes
23// ---------------------------------------------------------------------------
24#include <xercesc/util/PlatformUtils.hpp>
25#include <xercesc/parsers/AbstractDOMParser.hpp>
26#include <xercesc/dom/DOMImplementation.hpp>
27#include <xercesc/dom/DOMImplementationLS.hpp>
28#include <xercesc/dom/DOMImplementationRegistry.hpp>
29#include <xercesc/dom/DOMBuilder.hpp>
30#include <xercesc/dom/DOMException.hpp>
31#include <xercesc/dom/DOMDocument.hpp>
32#include <xercesc/dom/DOMNodeList.hpp>
33#include <xercesc/dom/DOMError.hpp>
34#include <xercesc/dom/DOMLocator.hpp>
35#include <xercesc/dom/DOMNamedNodeMap.hpp>
36#include <xercesc/dom/DOMAttr.hpp>
37#include "DOMCount.hpp"
38#include <string.h>
39#include <stdlib.h>
40
41#if defined(XERCES_NEW_IOSTREAMS)
42#include <fstream>
43#else
44#include <fstream.h>
45#endif
46
47
48// ---------------------------------------------------------------------------
49//  This is a simple program which invokes the DOMParser to build a DOM
50//  tree for the specified input file. It then walks the tree and counts
51//  the number of elements. The element count is then printed.
52// ---------------------------------------------------------------------------
53static void usage()
54{
55    XERCES_STD_QUALIFIER cout << "\nUsage:\n"
56            "    DOMCount [options] <XML file | List file>\n\n"
57            "This program invokes the DOMBuilder, builds the DOM tree,\n"
58            "and then prints the number of elements found in each XML file.\n\n"
59            "Options:\n"
60            "    -l          Indicate the input file is a List File that has a list of xml files.\n"
61            "                Default to off (Input file is an XML file).\n"
62            "    -v=xxx      Validation scheme [always | never | auto*].\n"
63            "    -n          Enable namespace processing. Defaults to off.\n"
64            "    -s          Enable schema processing. Defaults to off.\n"
65            "    -f          Enable full schema constraint checking. Defaults to off.\n"
66            "    -locale=ll_CC specify the locale, default: en_US.\n"
67            "    -p          Print out names of elements and attributes encountered.\n"
68                    "    -?          Show this help.\n\n"
69            "  * = Default if not provided explicitly.\n"
70         << XERCES_STD_QUALIFIER endl;
71}
72
73
74
75// ---------------------------------------------------------------------------
76//
77//  Recursively Count up the total number of child Elements under the specified Node.
78//  Process attributes of the node, if any.
79//
80// ---------------------------------------------------------------------------
81static int countChildElements(DOMNode *n, bool printOutEncounteredEles)
82{
83    DOMNode *child;
84    int count = 0;
85    if (n) {
86        if (n->getNodeType() == DOMNode::ELEMENT_NODE)
87                {
88            if(printOutEncounteredEles) {
89                char *name = XMLString::transcode(n->getNodeName());
90                XERCES_STD_QUALIFIER cout <<"----------------------------------------------------------"<<XERCES_STD_QUALIFIER endl;
91                XERCES_STD_QUALIFIER cout <<"Encountered Element : "<< name << XERCES_STD_QUALIFIER endl;
92               
93                XMLString::release(&name);
94                       
95                if(n->hasAttributes()) {
96                    // get all the attributes of the node
97                    DOMNamedNodeMap *pAttributes = n->getAttributes();
98                    int nSize = pAttributes->getLength();
99                    XERCES_STD_QUALIFIER cout <<"\tAttributes" << XERCES_STD_QUALIFIER endl;
100                    XERCES_STD_QUALIFIER cout <<"\t----------" << XERCES_STD_QUALIFIER endl;
101                    for(int i=0;i<nSize;++i) {
102                        DOMAttr *pAttributeNode = (DOMAttr*) pAttributes->item(i);
103                        // get attribute name
104                        char *name = XMLString::transcode(pAttributeNode->getName());
105                       
106                        XERCES_STD_QUALIFIER cout << "\t" << name << "=";
107                        XMLString::release(&name);
108                       
109                        // get attribute type
110                        name = XMLString::transcode(pAttributeNode->getValue());
111                        XERCES_STD_QUALIFIER cout << name << XERCES_STD_QUALIFIER endl;
112                        XMLString::release(&name);
113                    }
114                }
115            }
116                        ++count;
117                }
118        for (child = n->getFirstChild(); child != 0; child=child->getNextSibling())
119            count += countChildElements(child, printOutEncounteredEles);
120    }
121    return count;
122}
123
124// ---------------------------------------------------------------------------
125//
126//   main
127//
128// ---------------------------------------------------------------------------
129int main(int argC, char* argV[])
130{
131
132    // Check command line and extract arguments.
133    if (argC < 2)
134    {
135        usage();
136        return 1;
137    }
138
139    const char*                xmlFile = 0;
140    AbstractDOMParser::ValSchemes valScheme = AbstractDOMParser::Val_Auto;
141    bool                       doNamespaces       = false;
142    bool                       doSchema           = false;
143    bool                       schemaFullChecking = false;
144    bool                       doList = false;
145    bool                       errorOccurred = false;
146    bool                       recognizeNEL = false;
147    bool                       printOutEncounteredEles = false;
148    char                       localeStr[64];
149    memset(localeStr, 0, sizeof localeStr);
150
151    int argInd;
152    for (argInd = 1; argInd < argC; argInd++)
153    {
154        // Break out on first parm not starting with a dash
155        if (argV[argInd][0] != '-')
156            break;
157
158        // Watch for special case help request
159        if (!strcmp(argV[argInd], "-?"))
160        {
161            usage();
162            return 2;
163        }
164         else if (!strncmp(argV[argInd], "-v=", 3)
165              ||  !strncmp(argV[argInd], "-V=", 3))
166        {
167            const char* const parm = &argV[argInd][3];
168
169            if (!strcmp(parm, "never"))
170                valScheme = AbstractDOMParser::Val_Never;
171            else if (!strcmp(parm, "auto"))
172                valScheme = AbstractDOMParser::Val_Auto;
173            else if (!strcmp(parm, "always"))
174                valScheme = AbstractDOMParser::Val_Always;
175            else
176            {
177                XERCES_STD_QUALIFIER cerr << "Unknown -v= value: " << parm << XERCES_STD_QUALIFIER endl;
178                return 2;
179            }
180        }
181         else if (!strcmp(argV[argInd], "-n")
182              ||  !strcmp(argV[argInd], "-N"))
183        {
184            doNamespaces = true;
185        }
186         else if (!strcmp(argV[argInd], "-s")
187              ||  !strcmp(argV[argInd], "-S"))
188        {
189            doSchema = true;
190        }
191         else if (!strcmp(argV[argInd], "-f")
192              ||  !strcmp(argV[argInd], "-F"))
193        {
194            schemaFullChecking = true;
195        }
196         else if (!strcmp(argV[argInd], "-l")
197              ||  !strcmp(argV[argInd], "-L"))
198        {
199            doList = true;
200        }
201         else if (!strcmp(argV[argInd], "-special:nel"))
202        {
203            // turning this on will lead to non-standard compliance behaviour
204            // it will recognize the unicode character 0x85 as new line character
205            // instead of regular character as specified in XML 1.0
206            // do not turn this on unless really necessary
207
208             recognizeNEL = true;
209        }
210         else if (!strcmp(argV[argInd], "-p")
211              ||  !strcmp(argV[argInd], "-P"))
212        {
213            printOutEncounteredEles = true;
214        }
215         else if (!strncmp(argV[argInd], "-locale=", 8))
216        {
217             // Get out the end of line
218             strcpy(localeStr, &(argV[argInd][8]));
219        }                       
220         else
221        {
222            XERCES_STD_QUALIFIER cerr << "Unknown option '" << argV[argInd]
223                 << "', ignoring it\n" << XERCES_STD_QUALIFIER endl;
224        }
225    }
226
227    //
228    //  There should be only one and only one parameter left, and that
229    //  should be the file name.
230    //
231    if (argInd != argC - 1)
232    {
233        usage();
234        return 1;
235    }
236
237    // Initialize the XML4C system
238    try
239    {
240        if (strlen(localeStr))
241        {
242            XMLPlatformUtils::Initialize(localeStr);
243        }
244        else
245        {
246            XMLPlatformUtils::Initialize();
247        }
248
249        if (recognizeNEL)
250        {
251            XMLPlatformUtils::recognizeNEL(recognizeNEL);
252        }
253    }
254
255    catch (const XMLException& toCatch)
256    {
257         XERCES_STD_QUALIFIER cerr << "Error during initialization! :\n"
258              << StrX(toCatch.getMessage()) << XERCES_STD_QUALIFIER endl;
259         return 1;
260    }
261
262    // Instantiate the DOM parser.
263    static const XMLCh gLS[] = { chLatin_L, chLatin_S, chNull };
264    DOMImplementation *impl = DOMImplementationRegistry::getDOMImplementation(gLS);
265    DOMBuilder        *parser = ((DOMImplementationLS*)impl)->createDOMBuilder(DOMImplementationLS::MODE_SYNCHRONOUS, 0);
266
267    parser->setFeature(XMLUni::fgDOMNamespaces, doNamespaces);
268    parser->setFeature(XMLUni::fgXercesSchema, doSchema);
269    parser->setFeature(XMLUni::fgXercesSchemaFullChecking, schemaFullChecking);
270
271    if (valScheme == AbstractDOMParser::Val_Auto)
272    {
273        parser->setFeature(XMLUni::fgDOMValidateIfSchema, true);
274    }
275    else if (valScheme == AbstractDOMParser::Val_Never)
276    {
277        parser->setFeature(XMLUni::fgDOMValidation, false);
278    }
279    else if (valScheme == AbstractDOMParser::Val_Always)
280    {
281        parser->setFeature(XMLUni::fgDOMValidation, true);
282    }
283
284    // enable datatype normalization - default is off
285    parser->setFeature(XMLUni::fgDOMDatatypeNormalization, true);
286
287    // And create our error handler and install it
288    DOMCountErrorHandler errorHandler;
289    parser->setErrorHandler(&errorHandler);
290
291    //
292    //  Get the starting time and kick off the parse of the indicated
293    //  file. Catch any exceptions that might propogate out of it.
294    //
295    unsigned long duration;
296
297    bool more = true;
298    XERCES_STD_QUALIFIER ifstream fin;
299
300    // the input is a list file
301    if (doList)
302        fin.open(argV[argInd]);
303
304    if (fin.fail()) {
305        XERCES_STD_QUALIFIER cerr <<"Cannot open the list file: " << argV[argInd] << XERCES_STD_QUALIFIER endl;
306        return 2;
307    }
308
309    while (more)
310    {
311        char fURI[1000];
312        //initialize the array to zeros
313        memset(fURI,0,sizeof(fURI));
314
315        if (doList) {
316            if (! fin.eof() ) {
317                fin.getline (fURI, sizeof(fURI));
318                if (!*fURI)
319                    continue;
320                else {
321                    xmlFile = fURI;
322                    XERCES_STD_QUALIFIER cerr << "==Parsing== " << xmlFile << XERCES_STD_QUALIFIER endl;
323                }
324            }
325            else
326                break;
327        }
328        else {
329            xmlFile = argV[argInd];
330            more = false;
331        }
332
333        //reset error count first
334        errorHandler.resetErrors();
335
336        XERCES_CPP_NAMESPACE_QUALIFIER DOMDocument *doc = 0;
337
338        try
339        {
340            // reset document pool
341            parser->resetDocumentPool();
342
343            const unsigned long startMillis = XMLPlatformUtils::getCurrentMillis();
344            doc = parser->parseURI(xmlFile);
345            const unsigned long endMillis = XMLPlatformUtils::getCurrentMillis();
346            duration = endMillis - startMillis;
347        }
348
349        catch (const XMLException& toCatch)
350        {
351            XERCES_STD_QUALIFIER cerr << "\nError during parsing: '" << xmlFile << "'\n"
352                 << "Exception message is:  \n"
353                 << StrX(toCatch.getMessage()) << "\n" << XERCES_STD_QUALIFIER endl;
354            errorOccurred = true;
355            continue;
356        }
357        catch (const DOMException& toCatch)
358        {
359            const unsigned int maxChars = 2047;
360            XMLCh errText[maxChars + 1];
361
362            XERCES_STD_QUALIFIER cerr << "\nDOM Error during parsing: '" << xmlFile << "'\n"
363                 << "DOMException code is:  " << toCatch.code << XERCES_STD_QUALIFIER endl;
364
365            if (DOMImplementation::loadDOMExceptionMsg(toCatch.code, errText, maxChars))
366                 XERCES_STD_QUALIFIER cerr << "Message is: " << StrX(errText) << XERCES_STD_QUALIFIER endl;
367
368            errorOccurred = true;
369            continue;
370        }
371        catch (...)
372        {
373            XERCES_STD_QUALIFIER cerr << "\nUnexpected exception during parsing: '" << xmlFile << "'\n";
374            errorOccurred = true;
375            continue;
376        }
377
378        //
379        //  Extract the DOM tree, get the list of all the elements and report the
380        //  length as the count of elements.
381        //
382        if (errorHandler.getSawErrors())
383        {
384            XERCES_STD_QUALIFIER cout << "\nErrors occurred, no output available\n" << XERCES_STD_QUALIFIER endl;
385            errorOccurred = true;
386        }
387         else
388        {
389            unsigned int elementCount = 0;
390            if (doc) {
391                elementCount = countChildElements((DOMNode*)doc->getDocumentElement(), printOutEncounteredEles);
392                // test getElementsByTagName and getLength
393                XMLCh xa[] = {chAsterisk, chNull};
394                if (elementCount != doc->getElementsByTagName(xa)->getLength()) {
395                    XERCES_STD_QUALIFIER cout << "\nErrors occurred, element count is wrong\n" << XERCES_STD_QUALIFIER endl;
396                    errorOccurred = true;
397                }
398            }
399
400            // Print out the stats that we collected and time taken.
401            XERCES_STD_QUALIFIER cout << xmlFile << ": " << duration << " ms ("
402                 << elementCount << " elems)." << XERCES_STD_QUALIFIER endl;
403        }
404    }
405
406    //
407    //  Delete the parser itself.  Must be done prior to calling Terminate, below.
408    //
409    parser->release();
410
411    // And call the termination method
412    XMLPlatformUtils::Terminate();
413
414    if (doList)
415        fin.close();
416
417    if (errorOccurred)
418        return 4;
419    else
420        return 0;
421}
422
423
424
425
426
427DOMCountErrorHandler::DOMCountErrorHandler() :
428
429    fSawErrors(false)
430{
431}
432
433DOMCountErrorHandler::~DOMCountErrorHandler()
434{
435}
436
437
438// ---------------------------------------------------------------------------
439//  DOMCountHandlers: Overrides of the DOM ErrorHandler interface
440// ---------------------------------------------------------------------------
441bool DOMCountErrorHandler::handleError(const DOMError& domError)
442{
443    fSawErrors = true;
444    if (domError.getSeverity() == DOMError::DOM_SEVERITY_WARNING)
445        XERCES_STD_QUALIFIER cerr << "\nWarning at file ";
446    else if (domError.getSeverity() == DOMError::DOM_SEVERITY_ERROR)
447        XERCES_STD_QUALIFIER cerr << "\nError at file ";
448    else
449        XERCES_STD_QUALIFIER cerr << "\nFatal Error at file ";
450
451    XERCES_STD_QUALIFIER cerr << StrX(domError.getLocation()->getURI())
452         << ", line " << domError.getLocation()->getLineNumber()
453         << ", char " << domError.getLocation()->getColumnNumber()
454         << "\n  Message: " << StrX(domError.getMessage()) << XERCES_STD_QUALIFIER endl;
455
456    return true;
457}
458
459void DOMCountErrorHandler::resetErrors()
460{
461    fSawErrors = false;
462}
Note: See TracBrowser for help on using the repository browser.