source: NonGTP/Xerces/xerces/samples/SAX2Count/SAX2Count.cpp @ 358

Revision 358, 14.5 KB checked in by bittner, 19 years ago (diff)

xerces added

Line 
1/*
2 * Copyright 1999-2001,2004 The Apache Software Foundation.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17/*
18* $Log: SAX2Count.cpp,v $
19* Revision 1.29  2004/09/08 13:55:33  peiyongz
20* Apache License Version 2.0
21*
22* Revision 1.28  2004/09/02 14:59:29  cargilld
23* Add OutOfMemoryException block to samples.
24*
25* Revision 1.27  2004/04/13 19:40:47  peiyongz
26* usage
27*
28* Revision 1.26  2004/04/13 16:47:02  peiyongz
29* command line option to turn on/off Identity Constraint checking
30*
31* Revision 1.25  2003/05/30 09:36:36  gareth
32* Use new macros for iostream.h and std:: issues.
33*
34* Revision 1.24  2002/12/10 13:34:51  tng
35* Samples minor update in usage information.
36*
37* Revision 1.23  2002/11/08 16:18:50  peiyongz
38* no message
39*
40* Revision 1.22  2002/11/07 18:30:42  peiyongz
41* command line option for "locale"
42*
43* Revision 1.21  2002/11/04 14:09:06  tng
44* [Bug 14201] use of ios::nocreate breaks build.
45*
46* Revision 1.20  2002/11/01 22:05:57  tng
47* Samples/Test update: Issue error if the list file failed to open.
48*
49* Revision 1.19  2002/09/27 19:24:57  tng
50* Samples Fix: wrong length in memset
51*
52* Revision 1.18  2002/07/17 18:58:35  tng
53* samples update: for testing special encoding purpose.
54*
55* Revision 1.17  2002/06/17 15:33:05  tng
56* Name Xerces features as XMLUni::fgXercesXXXX instead of XMLUni::fgSAX2XercesXXXX so that they can be shared with DOM parser.
57*
58* Revision 1.16  2002/02/13 16:11:06  knoaman
59* Update samples to use SAX2 features/properties constants from XMLUni.
60*
61* Revision 1.15  2002/02/06 16:36:51  knoaman
62* Added a new flag '-p' to SAX2 samples to set the 'namespace-prefixes' feature.
63*
64* Revision 1.14  2002/02/01 22:38:52  peiyongz
65* sane_include
66*
67* Revision 1.13  2001/10/29 17:02:57  tng
68* Fix typo in samples.
69*
70* Revision 1.12  2001/10/25 15:18:33  tng
71* delete the parser before XMLPlatformUtils::Terminate.
72*
73* Revision 1.11  2001/10/19 19:02:43  tng
74* [Bug 3909] return non-zero an exit code when error was encounted.
75* And other modification for consistent help display and return code across samples.
76*
77* Revision 1.10  2001/08/15 12:41:04  tng
78* Initialize the fURI array to zeros, in case, some compilers like AIX xlC_r doesn't reset the memory.
79*
80* Revision 1.9  2001/08/08 12:12:32  tng
81* Print the file name only if doList is on.
82*
83* Revision 1.8  2001/08/03 15:08:17  tng
84* close the list file.
85*
86* Revision 1.7  2001/08/02 17:10:29  tng
87* Allow DOMCount/SAXCount/IDOMCount/SAX2Count to take a file that has a list of xml file as input.
88*
89* Revision 1.6  2001/08/01 19:11:01  tng
90* Add full schema constraint checking flag to the samples and the parser.
91*
92* Revision 1.5  2001/05/11 13:24:56  tng
93* Copyright update.
94*
95* Revision 1.4  2001/05/03 15:59:55  tng
96* Schema: samples update with schema
97*
98* Revision 1.3  2000/08/09 22:46:06  jpolast
99* replace occurences of SAXCount with SAX2Count
100*
101* Revision 1.2  2000/08/09 22:40:15  jpolast
102* updates for changes to sax2 core functionality.
103*
104* Revision 1.1  2000/08/08 17:17:20  jpolast
105* initial checkin of SAX2Count
106*
107*
108*/
109
110
111// ---------------------------------------------------------------------------
112//  Includes
113// ---------------------------------------------------------------------------
114#include "SAX2Count.hpp"
115#include <xercesc/util/PlatformUtils.hpp>
116#include <xercesc/sax2/SAX2XMLReader.hpp>
117#include <xercesc/sax2/XMLReaderFactory.hpp>
118#if defined(XERCES_NEW_IOSTREAMS)
119#include <fstream>
120#else
121#include <fstream.h>
122#endif
123#include <xercesc/util/OutOfMemoryException.hpp>
124
125// ---------------------------------------------------------------------------
126//  Local helper methods
127// ---------------------------------------------------------------------------
128void usage()
129{
130    XERCES_STD_QUALIFIER cout << "\nUsage:\n"
131            "    SAX2Count [options] <XML file | List file>\n\n"
132            "This program invokes the SAX2XMLReader, and then prints the\n"
133            "number of elements, attributes, spaces and characters found\n"
134            "in each XML file, using SAX2 API.\n\n"
135            "Options:\n"
136            "    -l          Indicate the input file is a List File that has a list of xml files.\n"
137            "                Default to off (Input file is an XML file).\n"
138            "    -v=xxx      Validation scheme [always | never | auto*].\n"
139            "    -f          Enable full schema constraint checking processing. Defaults to off.\n"
140            "    -p          Enable namespace-prefixes feature. Defaults to off.\n"
141            "    -n          Disable namespace processing. Defaults to on.\n"
142            "                NOTE: THIS IS OPPOSITE FROM OTHER SAMPLES.\n"
143            "    -s          Disable schema processing. Defaults to on.\n"
144            "                NOTE: THIS IS OPPOSITE FROM OTHER SAMPLES.\n"
145            "    -i          Disable identity constraint checking. Defaults to on.\n"
146            "                NOTE: THIS IS OPPOSITE FROM OTHER SAMPLES.\n"
147            "    -locale=ll_CC specify the locale, default: en_US.\n"
148            "    -?          Show this help.\n\n"
149            "  * = Default if not provided explicitly.\n"
150         << XERCES_STD_QUALIFIER endl;
151}
152
153
154// ---------------------------------------------------------------------------
155//  Program entry point
156// ---------------------------------------------------------------------------
157int main(int argC, char* argV[])
158{
159
160    // Check command line and extract arguments.
161    if (argC < 2)
162    {
163        usage();
164        return 1;
165    }
166
167    const char*                  xmlFile      = 0;
168    SAX2XMLReader::ValSchemes    valScheme    = SAX2XMLReader::Val_Auto;
169    bool                         doNamespaces = true;
170    bool                         doSchema = true;
171    bool                         schemaFullChecking = false;
172    bool                         identityConstraintChecking = true;
173    bool                         doList = false;
174    bool                         errorOccurred = false;
175    bool                         namespacePrefixes = false;
176    bool                         recognizeNEL = false;
177    char                         localeStr[64];
178    memset(localeStr, 0, sizeof localeStr);
179
180    int argInd;
181    for (argInd = 1; argInd < argC; argInd++)
182    {
183        // Break out on first parm not starting with a dash
184        if (argV[argInd][0] != '-')
185            break;
186
187        // Watch for special case help request
188        if (!strcmp(argV[argInd], "-?"))
189        {
190            usage();
191            return 2;
192        }
193         else if (!strncmp(argV[argInd], "-v=", 3)
194              ||  !strncmp(argV[argInd], "-V=", 3))
195        {
196            const char* const parm = &argV[argInd][3];
197
198            if (!strcmp(parm, "never"))
199                valScheme = SAX2XMLReader::Val_Never;
200            else if (!strcmp(parm, "auto"))
201                valScheme = SAX2XMLReader::Val_Auto;
202            else if (!strcmp(parm, "always"))
203                valScheme = SAX2XMLReader::Val_Always;
204            else
205            {
206                XERCES_STD_QUALIFIER cerr << "Unknown -v= value: " << parm << XERCES_STD_QUALIFIER endl;
207                return 2;
208            }
209        }
210         else if (!strcmp(argV[argInd], "-n")
211              ||  !strcmp(argV[argInd], "-N"))
212        {
213            doNamespaces = false;
214        }
215         else if (!strcmp(argV[argInd], "-s")
216              ||  !strcmp(argV[argInd], "-S"))
217        {
218            doSchema = false;
219        }
220         else if (!strcmp(argV[argInd], "-f")
221              ||  !strcmp(argV[argInd], "-F"))
222        {
223            schemaFullChecking = true;
224        }
225         else if (!strcmp(argV[argInd], "-i")
226              ||  !strcmp(argV[argInd], "-I"))
227        {
228            identityConstraintChecking = false;
229        }
230         else if (!strcmp(argV[argInd], "-l")
231              ||  !strcmp(argV[argInd], "-L"))
232        {
233            doList = true;
234        }
235         else if (!strcmp(argV[argInd], "-p")
236              ||  !strcmp(argV[argInd], "-P"))
237        {
238            namespacePrefixes = true;
239        }
240         else if (!strcmp(argV[argInd], "-special:nel"))
241        {
242            // turning this on will lead to non-standard compliance behaviour
243            // it will recognize the unicode character 0x85 as new line character
244            // instead of regular character as specified in XML 1.0
245            // do not turn this on unless really necessary
246             recognizeNEL = true;
247        }
248         else if (!strncmp(argV[argInd], "-locale=", 8))
249        {
250             // Get out the end of line
251             strcpy(localeStr, &(argV[argInd][8]));
252        }                       
253        else
254        {
255            XERCES_STD_QUALIFIER cerr << "Unknown option '" << argV[argInd]
256                << "', ignoring it\n" << XERCES_STD_QUALIFIER endl;
257        }
258    }
259
260    //
261    //  There should be only one and only one parameter left, and that
262    //  should be the file name.
263    //
264    if (argInd != argC - 1)
265    {
266        usage();
267        return 1;
268    }
269
270    // Initialize the XML4C2 system
271    try
272    {
273        if (strlen(localeStr))
274        {
275            XMLPlatformUtils::Initialize(localeStr);
276        }
277        else
278        {
279            XMLPlatformUtils::Initialize();
280        }
281
282        if (recognizeNEL)
283        {
284            XMLPlatformUtils::recognizeNEL(recognizeNEL);
285        }
286    }
287
288    catch (const XMLException& toCatch)
289    {
290        XERCES_STD_QUALIFIER cerr << "Error during initialization! Message:\n"
291            << StrX(toCatch.getMessage()) << XERCES_STD_QUALIFIER endl;
292        return 1;
293    }
294
295    //
296    //  Create a SAX parser object. Then, according to what we were told on
297    //  the command line, set it to validate or not.
298    //
299    SAX2XMLReader* parser = XMLReaderFactory::createXMLReader();
300    parser->setFeature(XMLUni::fgSAX2CoreNameSpaces, doNamespaces);
301    parser->setFeature(XMLUni::fgXercesSchema, doSchema);
302    parser->setFeature(XMLUni::fgXercesSchemaFullChecking, schemaFullChecking);
303    parser->setFeature(XMLUni::fgXercesIdentityConstraintChecking, identityConstraintChecking);
304    parser->setFeature(XMLUni::fgSAX2CoreNameSpacePrefixes, namespacePrefixes);
305
306    if (valScheme == SAX2XMLReader::Val_Auto)
307    {
308        parser->setFeature(XMLUni::fgSAX2CoreValidation, true);
309        parser->setFeature(XMLUni::fgXercesDynamic, true);
310    }
311    if (valScheme == SAX2XMLReader::Val_Never)
312    {
313        parser->setFeature(XMLUni::fgSAX2CoreValidation, false);
314    }
315    if (valScheme == SAX2XMLReader::Val_Always)
316    {
317        parser->setFeature(XMLUni::fgSAX2CoreValidation, true);
318        parser->setFeature(XMLUni::fgXercesDynamic, false);
319    }
320
321    //
322    //  Create our SAX handler object and install it on the parser, as the
323    //  document and error handler.
324    //
325    SAX2CountHandlers handler;
326    parser->setContentHandler(&handler);
327    parser->setErrorHandler(&handler);
328
329    //
330    //  Get the starting time and kick off the parse of the indicated
331    //  file. Catch any exceptions that might propogate out of it.
332    //
333    unsigned long duration;
334
335    bool more = true;
336    XERCES_STD_QUALIFIER ifstream fin;
337
338    // the input is a list file
339    if (doList)
340        fin.open(argV[argInd]);
341
342    if (fin.fail()) {
343        XERCES_STD_QUALIFIER cerr <<"Cannot open the list file: " << argV[argInd] << XERCES_STD_QUALIFIER endl;
344        return 2;
345    }
346
347    while (more)
348    {
349        char fURI[1000];
350        //initialize the array to zeros
351        memset(fURI,0,sizeof(fURI));
352
353        if (doList) {
354            if (! fin.eof() ) {
355                fin.getline (fURI, sizeof(fURI));
356                if (!*fURI)
357                    continue;
358                else {
359                    xmlFile = fURI;
360                    XERCES_STD_QUALIFIER cerr << "==Parsing== " << xmlFile << XERCES_STD_QUALIFIER endl;
361                }
362            }
363            else
364                break;
365        }
366        else {
367            xmlFile = argV[argInd];
368            more = false;
369        }
370
371        //reset error count first
372        handler.resetErrors();
373
374        try
375        {
376            const unsigned long startMillis = XMLPlatformUtils::getCurrentMillis();
377            parser->parse(xmlFile);
378            const unsigned long endMillis = XMLPlatformUtils::getCurrentMillis();
379            duration = endMillis - startMillis;
380        }
381        catch (const OutOfMemoryException&)
382        {
383            XERCES_STD_QUALIFIER cerr << "OutOfMemoryException" << XERCES_STD_QUALIFIER endl;
384            errorOccurred = true;
385            continue;
386        }
387        catch (const XMLException& e)
388        {
389            XERCES_STD_QUALIFIER cerr << "\nError during parsing: '" << xmlFile << "'\n"
390                << "Exception message is:  \n"
391                << StrX(e.getMessage()) << "\n" << XERCES_STD_QUALIFIER endl;
392            errorOccurred = true;
393            continue;
394        }
395
396        catch (...)
397        {
398            XERCES_STD_QUALIFIER cerr << "\nUnexpected exception during parsing: '" << xmlFile << "'\n";
399            errorOccurred = true;
400            continue;
401        }
402
403
404        // Print out the stats that we collected and time taken
405        if (!handler.getSawErrors())
406        {
407            XERCES_STD_QUALIFIER cout << xmlFile << ": " << duration << " ms ("
408                << handler.getElementCount() << " elems, "
409                << handler.getAttrCount() << " attrs, "
410                << handler.getSpaceCount() << " spaces, "
411                << handler.getCharacterCount() << " chars)" << XERCES_STD_QUALIFIER endl;
412        }
413        else
414            errorOccurred = true;
415    }
416
417    if (doList)
418        fin.close();
419
420    //
421    //  Delete the parser itself.  Must be done prior to calling Terminate, below.
422    //
423    delete parser;
424
425    // And call the termination method
426    XMLPlatformUtils::Terminate();
427
428    if (errorOccurred)
429        return 4;
430    else
431        return 0;
432
433}
434
Note: See TracBrowser for help on using the repository browser.