source: NonGTP/Xerces/xerces/samples/SAXCount/SAXCount.cpp @ 358

Revision 358, 13.4 KB checked in by bittner, 19 years ago (diff)

xerces added

Line 
1/*
2 * Copyright 1999-2001,2004 The Apache Software Foundation.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17/*
18* $Log: SAXCount.cpp,v $
19* Revision 1.30  2004/09/08 13:55:33  peiyongz
20* Apache License Version 2.0
21*
22* Revision 1.29  2004/09/02 14:59:29  cargilld
23* Add OutOfMemoryException block to samples.
24*
25* Revision 1.28  2003/05/30 09:36:36  gareth
26* Use new macros for iostream.h and std:: issues.
27*
28* Revision 1.27  2002/12/10 13:34:42  tng
29* Samples minor update in usage information.
30*
31* Revision 1.26  2002/11/08 16:19:05  peiyongz
32* no message
33*
34* Revision 1.25  2002/11/07 18:31:04  peiyongz
35* command line option for "locale"
36*
37* Revision 1.24  2002/11/04 14:09:16  tng
38* [Bug 14201] use of ios::nocreate breaks build.
39*
40* Revision 1.23  2002/11/01 22:05:44  tng
41* Samples/Test update: Issue error if the list file failed to open.
42*
43* Revision 1.22  2002/09/27 19:25:10  tng
44* Samples Fix: wrong length in memset
45*
46* Revision 1.21  2002/07/17 18:58:36  tng
47* samples update: for testing special encoding purpose.
48*
49* Revision 1.20  2001/11/13 13:22:35  tng
50* SAXCount fix: restore previous feature to accept multiple input files.
51*
52* Revision 1.19  2001/10/29 17:02:57  tng
53* Fix typo in samples.
54*
55* Revision 1.18  2001/10/25 15:18:33  tng
56* delete the parser before XMLPlatformUtils::Terminate.
57*
58* Revision 1.17  2001/10/19 19:02:43  tng
59* [Bug 3909] return non-zero an exit code when error was encounted.
60* And other modification for consistent help display and return code across samples.
61*
62* Revision 1.16  2001/08/15 12:41:04  tng
63* Initialize the fURI array to zeros, in case, some compilers like AIX xlC_r doesn't reset the memory.
64*
65* Revision 1.15  2001/08/08 12:12:32  tng
66* Print the file name only if doList is on.
67*
68* Revision 1.14  2001/08/03 15:08:17  tng
69* close the list file.
70*
71* Revision 1.13  2001/08/02 17:10:29  tng
72* Allow DOMCount/SAXCount/IDOMCount/SAX2Count to take a file that has a list of xml file as input.
73*
74* Revision 1.12  2001/08/01 19:11:01  tng
75* Add full schema constraint checking flag to the samples and the parser.
76*
77* Revision 1.11  2001/05/11 13:24:57  tng
78* Copyright update.
79*
80* Revision 1.10  2001/05/03 16:00:12  tng
81* Schema: samples update with schema
82*
83* Revision 1.9  2000/10/19 23:52:41  andyh
84* SAXCount: Allow multiple files on command line
85*
86* Revision 1.8  2000/06/16 20:25:38  rahulj
87* Add the -v=always option to force validation checking. Need this
88* option for running the conformance tests.
89*
90* Revision 1.7  2000/05/31 18:39:59  rahulj
91* 'Auto' validation is the default processing mode.
92*
93* Revision 1.6  2000/05/09 00:22:29  andyh
94* Memory Cleanup.  XMLPlatformUtils::Terminate() deletes all lazily
95* allocated memory; memory leak checking tools will no longer report
96* that leaks exist.  (DOM GetElementsByTagID temporarily removed
97* as part of this.)
98*
99* Revision 1.5  2000/03/03 01:29:31  roddey
100* Added a scanReset()/parseReset() method to the scanner and
101* parsers, to allow for reset after early exit from a progressive parse.
102* Added calls to new Terminate() call to all of the samples. Improved
103* documentation in SAX and DOM parsers.
104*
105* Revision 1.4  2000/03/02 19:53:47  roddey
106* This checkin includes many changes done while waiting for the
107* 1.1.0 code to be finished. I can't list them all here, but a list is
108* available elsewhere.
109*
110* Revision 1.3  2000/02/11 02:39:10  abagchi
111* Removed StrX::transcode
112*
113* Revision 1.2  2000/02/06 07:47:23  rahulj
114* Year 2K copyright swat.
115*
116* Revision 1.1.1.1  1999/11/09 01:09:30  twl
117* Initial checkin
118*
119* Revision 1.7  1999/11/08 20:43:40  rahul
120* Swat for adding in Product name and CVS comment log variable.
121*
122*/
123
124
125// ---------------------------------------------------------------------------
126//  Includes
127// ---------------------------------------------------------------------------
128#include "SAXCount.hpp"
129#if defined(XERCES_NEW_IOSTREAMS)
130#include <fstream>
131#else
132#include <fstream.h>
133#endif
134#include <xercesc/util/OutOfMemoryException.hpp>
135
136// ---------------------------------------------------------------------------
137//  Local helper methods
138// ---------------------------------------------------------------------------
139void usage()
140{
141    XERCES_STD_QUALIFIER cout << "\nUsage:\n"
142            "    SAXCount [options] <XML file | List file>\n\n"
143            "This program invokes the SAX Parser, and then prints the\n"
144            "number of elements, attributes, spaces and characters found\n"
145            "in each XML file, using SAX API.\n\n"
146            "Options:\n"
147            "    -l          Indicate the input file is a List File that has a list of xml files.\n"
148            "                Default to off (Input file is an XML file).\n"
149            "    -v=xxx      Validation scheme [always | never | auto*].\n"
150            "    -n          Enable namespace processing. Defaults to off.\n"
151            "    -s          Enable schema processing. Defaults to off.\n"
152            "    -f          Enable full schema constraint checking. Defaults to off.\n"
153            "    -locale=ll_CC specify the locale, default: en_US.\n"
154                    "    -?          Show this help.\n\n"
155            "  * = Default if not provided explicitly.\n"
156         << XERCES_STD_QUALIFIER endl;
157}
158
159
160// ---------------------------------------------------------------------------
161//  Program entry point
162// ---------------------------------------------------------------------------
163int main(int argC, char* argV[])
164{
165
166    // Check command line and extract arguments.
167    if (argC < 2)
168    {
169        usage();
170        return 1;
171    }
172
173    const char*              xmlFile = 0;
174    SAXParser::ValSchemes    valScheme = SAXParser::Val_Auto;
175    bool                     doNamespaces       = false;
176    bool                     doSchema           = false;
177    bool                     schemaFullChecking = false;
178    bool                     doList = false;
179    bool                     errorOccurred = false;
180    bool                     recognizeNEL = false;
181    char                     localeStr[64];
182    memset(localeStr, 0, sizeof localeStr);
183
184    int argInd;
185    for (argInd = 1; argInd < argC; argInd++)
186    {
187        // Break out on first parm not starting with a dash
188        if (argV[argInd][0] != '-')
189            break;
190
191        // Watch for special case help request
192        if (!strcmp(argV[argInd], "-?"))
193        {
194            usage();
195            return 2;
196        }
197         else if (!strncmp(argV[argInd], "-v=", 3)
198              ||  !strncmp(argV[argInd], "-V=", 3))
199        {
200            const char* const parm = &argV[argInd][3];
201
202            if (!strcmp(parm, "never"))
203                valScheme = SAXParser::Val_Never;
204            else if (!strcmp(parm, "auto"))
205                valScheme = SAXParser::Val_Auto;
206            else if (!strcmp(parm, "always"))
207                valScheme = SAXParser::Val_Always;
208            else
209            {
210                XERCES_STD_QUALIFIER cerr << "Unknown -v= value: " << parm << XERCES_STD_QUALIFIER endl;
211                return 2;
212            }
213        }
214         else if (!strcmp(argV[argInd], "-n")
215              ||  !strcmp(argV[argInd], "-N"))
216        {
217            doNamespaces = true;
218        }
219         else if (!strcmp(argV[argInd], "-s")
220              ||  !strcmp(argV[argInd], "-S"))
221        {
222            doSchema = true;
223        }
224         else if (!strcmp(argV[argInd], "-f")
225              ||  !strcmp(argV[argInd], "-F"))
226        {
227            schemaFullChecking = true;
228        }
229         else if (!strcmp(argV[argInd], "-l")
230              ||  !strcmp(argV[argInd], "-L"))
231        {
232            doList = true;
233        }
234         else if (!strcmp(argV[argInd], "-special:nel"))
235        {
236            // turning this on will lead to non-standard compliance behaviour
237            // it will recognize the unicode character 0x85 as new line character
238            // instead of regular character as specified in XML 1.0
239            // do not turn this on unless really necessary
240             recognizeNEL = true;
241        }
242         else if (!strncmp(argV[argInd], "-locale=", 8))
243        {
244             // Get out the end of line
245             strcpy(localeStr, &(argV[argInd][8]));
246        }                       
247        else
248        {
249            XERCES_STD_QUALIFIER cerr << "Unknown option '" << argV[argInd]
250                << "', ignoring it\n" << XERCES_STD_QUALIFIER endl;
251        }
252    }
253
254    //
255    //  There should at least one parameter left, and that
256    //  should be the file name(s).
257    //
258    if (argInd == argC)
259    {
260        usage();
261        return 1;
262    }
263
264    // Initialize the XML4C2 system
265    try
266    {
267        if (strlen(localeStr))
268        {
269            XMLPlatformUtils::Initialize(localeStr);
270        }
271        else
272        {
273            XMLPlatformUtils::Initialize();
274        }
275
276        if (recognizeNEL)
277        {
278            XMLPlatformUtils::recognizeNEL(recognizeNEL);
279        }
280    }
281
282    catch (const XMLException& toCatch)
283    {
284        XERCES_STD_QUALIFIER cerr << "Error during initialization! Message:\n"
285            << StrX(toCatch.getMessage()) << XERCES_STD_QUALIFIER endl;
286        return 1;
287    }
288
289    //
290    //  Create a SAX parser object. Then, according to what we were told on
291    //  the command line, set it to validate or not.
292    //
293    SAXParser* parser = new SAXParser;
294
295    parser->setValidationScheme(valScheme);
296    parser->setDoNamespaces(doNamespaces);
297    parser->setDoSchema(doSchema);
298    parser->setValidationSchemaFullChecking(schemaFullChecking);
299
300    //
301    //  Create our SAX handler object and install it on the parser, as the
302    //  document and error handler.
303    //
304    SAXCountHandlers handler;
305    parser->setDocumentHandler(&handler);
306    parser->setErrorHandler(&handler);
307
308
309    //
310    //  Get the starting time and kick off the parse of the indicated
311    //  file. Catch any exceptions that might propogate out of it.
312    //
313    unsigned long duration;
314
315    XERCES_STD_QUALIFIER ifstream fin;
316
317    // the input is a list file
318    if (doList)
319        fin.open(argV[argInd]);
320
321    if (fin.fail()) {
322        XERCES_STD_QUALIFIER cerr <<"Cannot open the list file: " << argV[argInd] << XERCES_STD_QUALIFIER endl;
323        return 2;
324    }
325
326    while (true)
327    {
328        char fURI[1000];
329        //initialize the array to zeros
330        memset(fURI,0,sizeof(fURI));
331
332        if (doList) {
333            if (! fin.eof() ) {
334                fin.getline (fURI, sizeof(fURI));
335                if (!*fURI)
336                    continue;
337                else {
338                    xmlFile = fURI;
339                    XERCES_STD_QUALIFIER cerr << "==Parsing== " << xmlFile << XERCES_STD_QUALIFIER endl;
340                }
341            }
342            else
343                break;
344        }
345        else {
346            if (argInd < argC)
347            {
348                 xmlFile = argV[argInd];
349                 argInd++;
350            }
351            else
352                break;
353        }
354
355        //reset error count first
356        handler.resetErrors();
357
358        try
359        {
360            const unsigned long startMillis = XMLPlatformUtils::getCurrentMillis();
361            parser->parse(xmlFile);
362            const unsigned long endMillis = XMLPlatformUtils::getCurrentMillis();
363            duration = endMillis - startMillis;
364        }
365        catch (const OutOfMemoryException&)
366        {
367            XERCES_STD_QUALIFIER cerr << "OutOfMemoryException" << XERCES_STD_QUALIFIER endl;
368            errorOccurred = true;
369            continue;
370        }
371        catch (const XMLException& e)
372        {
373            XERCES_STD_QUALIFIER cerr << "\nError during parsing: '" << xmlFile << "'\n"
374                << "Exception message is:  \n"
375                << StrX(e.getMessage()) << "\n" << XERCES_STD_QUALIFIER endl;
376            errorOccurred = true;
377            continue;
378        }
379
380        catch (...)
381        {
382            XERCES_STD_QUALIFIER cerr << "\nUnexpected exception during parsing: '" << xmlFile << "'\n";
383            errorOccurred = true;
384            continue;
385        }
386
387
388        // Print out the stats that we collected and time taken
389        if (!handler.getSawErrors())
390        {
391            XERCES_STD_QUALIFIER cout << xmlFile << ": " << duration << " ms ("
392                << handler.getElementCount() << " elems, "
393                << handler.getAttrCount() << " attrs, "
394                << handler.getSpaceCount() << " spaces, "
395                << handler.getCharacterCount() << " chars)" << XERCES_STD_QUALIFIER endl;
396        }
397        else
398            errorOccurred = true;
399    }
400
401    if (doList)
402        fin.close();
403
404    //
405    //  Delete the parser itself.  Must be done prior to calling Terminate, below.
406    //
407    delete parser;
408
409    // And call the termination method
410    XMLPlatformUtils::Terminate();
411
412    if (errorOccurred)
413        return 4;
414    else
415        return 0;
416
417}
418
Note: See TracBrowser for help on using the repository browser.