source: NonGTP/Xerces/xerces/samples/MemParse/MemParse.cpp @ 358

Revision 358, 11.7 KB checked in by bittner, 19 years ago (diff)

xerces added

Line 
1/*
2 * Copyright 1999-2001,2004 The Apache Software Foundation.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17
18/*
19 * $Log: MemParse.cpp,v $
20 * Revision 1.18  2004/09/08 13:55:32  peiyongz
21 * Apache License Version 2.0
22 *
23 * Revision 1.17  2004/09/02 14:59:28  cargilld
24 * Add OutOfMemoryException block to samples.
25 *
26 * Revision 1.16  2003/09/12 18:14:19  neilg
27 * enable MemParse to work on OS400; thanks to Jay Hansen.
28 *
29 * Revision 1.15  2003/08/07 21:21:38  neilg
30 * fix segmentation faults that may arise when the parser throws exceptions during document parsing.  In general, XMLPlatformUtils::Terminate() should not be called from within a catch statement.
31 *
32 * Revision 1.14  2003/05/30 09:36:35  gareth
33 * Use new macros for iostream.h and std:: issues.
34 *
35 * Revision 1.13  2002/02/01 22:37:14  peiyongz
36 * sane_include
37 *
38 * Revision 1.12  2001/10/25 15:18:33  tng
39 * delete the parser before XMLPlatformUtils::Terminate.
40 *
41 * Revision 1.11  2001/10/19 18:56:08  tng
42 * Pulled the hardcoded "encoding" out of the document itself and made it a #define
43 * to make it easier to support other encodings.  Patch from David McCreedy.
44 * And other modification for consistent help display and return code across samples.
45 *
46 * Revision 1.10  2001/08/01 19:11:01  tng
47 * Add full schema constraint checking flag to the samples and the parser.
48 *
49 * Revision 1.9  2001/05/11 13:24:55  tng
50 * Copyright update.
51 *
52 * Revision 1.8  2001/05/03 15:59:40  tng
53 * Schema: samples update with schema
54 *
55 * Revision 1.7  2000/09/11 18:43:48  aruna1
56 * OS390 related updates
57 *
58 * Revision 1.6  2000/03/02 19:53:42  roddey
59 * This checkin includes many changes done while waiting for the
60 * 1.1.0 code to be finished. I can't list them all here, but a list is
61 * available elsewhere.
62 *
63 * Revision 1.5  2000/02/11 02:37:01  abagchi
64 * Removed StrX::transcode
65 *
66 * Revision 1.4  2000/02/06 07:47:19  rahulj
67 * Year 2K copyright swat.
68 *
69 * Revision 1.3  2000/01/12 00:27:00  roddey
70 * Updates to work with the new URL and input source scheme.
71 *
72 * Revision 1.2  1999/11/20 01:09:55  rahulj
73 * Fixed usage message.
74 *
75 * Revision 1.1.1.1  1999/11/09 01:09:49  twl
76 * Initial checkin
77 *
78 * Revision 1.7  1999/11/08 20:43:36  rahul
79 * Swat for adding in Product name and CVS comment log variable.
80 *
81 */
82
83
84/**
85 * This sample program illustrates how one can use a memory buffer as the
86 * input to parser. The memory buffer contains raw bytes representing XML
87 * statements.
88 *
89 * Look at the API documentation for 'MemBufInputSource' for more information
90 * on parameters to the constructor.
91 *
92 */
93
94
95// ---------------------------------------------------------------------------
96//  Includes
97// ---------------------------------------------------------------------------
98#include <xercesc/parsers/SAXParser.hpp>
99#include <xercesc/framework/MemBufInputSource.hpp>
100#include "MemParse.hpp"
101#include <xercesc/util/OutOfMemoryException.hpp>
102
103// ---------------------------------------------------------------------------
104//  Local const data
105//
106//  gXMLInMemBuf
107//      Defines the memory buffer contents here which parsed by the XML
108//      parser. This is the cheap way to do it, instead of reading it from
109//      somewhere. For this demo, its fine.
110//
111//      NOTE: If your encoding is not ascii you will need to change
112//            the MEMPARSE_ENCODING #define
113//
114//  gMemBufId
115//      A simple name to give as the system id for the memory buffer. This
116//      just for indentification purposes in case of errors. Its not a real
117//      system id (and the parser knows that.)
118// ---------------------------------------------------------------------------
119
120#ifndef MEMPARSE_ENCODING
121   #if defined(OS390)
122      #define MEMPARSE_ENCODING "ibm-1047-s390"
123   #elif defined(OS400)
124      #define MEMPARSE_ENCODING "ibm037"
125   #else
126      #define MEMPARSE_ENCODING "ascii"
127   #endif
128#endif /* ifndef MEMPARSE_ENCODING */
129
130static const char*  gXMLInMemBuf =
131"\
132<?xml version='1.0' encoding='" MEMPARSE_ENCODING "'?>\n\
133<!DOCTYPE company [\n\
134<!ELEMENT company     (product,category,developedAt)>\n\
135<!ELEMENT product     (#PCDATA)>\n\
136<!ELEMENT category    (#PCDATA)>\n\
137<!ATTLIST category idea CDATA #IMPLIED>\n\
138<!ELEMENT developedAt (#PCDATA)>\n\
139]>\n\n\
140<company>\n\
141    <product>XML4C</product>\n\
142    <category idea='great'>XML Parsing Tools</category>\n\
143    <developedAt>\n\
144      IBM Center for Java Technology, Silicon Valley, Cupertino, CA\n\
145    </developedAt>\n\
146</company>\
147";
148
149static const char*  gMemBufId = "prodInfo";
150
151
152
153// ---------------------------------------------------------------------------
154//  Local helper methods
155// ---------------------------------------------------------------------------
156void usage()
157{
158    XERCES_STD_QUALIFIER cout << "\nUsage:\n"
159            "    MemParse [options]\n\n"
160            "This program uses the SAX Parser to parse a memory buffer\n"
161            "containing XML statements, and reports the number of\n"
162            "elements and attributes found.\n\n"
163            "Options:\n"
164            "    -v=xxx      Validation scheme [always | never | auto*].\n"
165            "    -n          Enable namespace processing. Defaults to off.\n"
166            "    -s          Enable schema processing. Defaults to off.\n"
167            "    -f          Enable full schema constraint checking. Defaults to off.\n"
168                      "    -?          Show this help.\n\n"
169            "  * = Default if not provided explicitly.\n"
170         << XERCES_STD_QUALIFIER endl;
171}
172
173
174// ---------------------------------------------------------------------------
175//  Program entry point
176// ---------------------------------------------------------------------------
177int main(int argC, char* argV[])
178{
179    // Initialize the XML4C2 system
180    try
181    {
182         XMLPlatformUtils::Initialize();
183    }
184    catch (const XMLException& toCatch)
185    {
186         XERCES_STD_QUALIFIER cerr << "Error during initialization! Message:\n"
187              << StrX(toCatch.getMessage()) << XERCES_STD_QUALIFIER endl;
188         return 1;
189    }
190
191    SAXParser::ValSchemes    valScheme = SAXParser::Val_Auto;
192    bool doNamespaces       = false;
193    bool doSchema           = false;
194    bool schemaFullChecking = false;
195
196    int argInd;
197    for (argInd = 1; argInd < argC; argInd++)
198    {
199        // Break out on first parm not starting with a dash
200        if (argV[argInd][0] != '-')
201        {
202            usage();
203            XMLPlatformUtils::Terminate();
204            return 1;
205        }
206
207        // Watch for special case help request
208        if (!strcmp(argV[argInd], "-?"))
209        {
210            usage();
211            XMLPlatformUtils::Terminate();
212            return 1;
213        }
214         else if (!strncmp(argV[argInd], "-v=", 3)
215              ||  !strncmp(argV[argInd], "-V=", 3))
216        {
217            const char* const parm = &argV[argInd][3];
218
219            if (!strcmp(parm, "never"))
220                valScheme = SAXParser::Val_Never;
221            else if (!strcmp(parm, "auto"))
222                valScheme = SAXParser::Val_Auto;
223            else if (!strcmp(parm, "always"))
224                valScheme = SAXParser::Val_Always;
225            else
226            {
227                XERCES_STD_QUALIFIER cerr << "Unknown -v= value: " << parm << XERCES_STD_QUALIFIER endl;
228                return 2;
229            }
230        }
231         else if (!strcmp(argV[argInd], "-n")
232              ||  !strcmp(argV[argInd], "-N"))
233        {
234            doNamespaces = true;
235        }
236         else if (!strcmp(argV[argInd], "-s")
237              ||  !strcmp(argV[argInd], "-S"))
238        {
239            doSchema = true;
240        }
241         else if (!strcmp(argV[argInd], "-f")
242              ||  !strcmp(argV[argInd], "-F"))
243        {
244            schemaFullChecking = true;
245        }
246         else
247        {
248            XERCES_STD_QUALIFIER cerr << "Unknown option '" << argV[argInd]
249                 << "', ignoring it\n" << XERCES_STD_QUALIFIER endl;
250        }
251    }
252
253    //
254    //  Create a SAX parser object. Then, according to what we were told on
255    //  the command line, set it to validate or not.
256    //
257    SAXParser* parser = new SAXParser;
258    parser->setValidationScheme(valScheme);
259    parser->setDoNamespaces(doNamespaces);
260    parser->setDoSchema(doSchema);
261    parser->setValidationSchemaFullChecking(schemaFullChecking);
262
263    //
264    //  Create our SAX handler object and install it on the parser, as the
265    //  document and error handlers.
266    //
267    MemParseHandlers handler;
268    parser->setDocumentHandler(&handler);
269    parser->setErrorHandler(&handler);
270
271    //
272    //  Create MemBufferInputSource from the buffer containing the XML
273    //  statements.
274    //
275    //  NOTE: We are using strlen() here, since we know that the chars in
276    //  our hard coded buffer are single byte chars!!! The parameter wants
277    //  the number of BYTES, not chars, so when you create a memory buffer
278    //  give it the byte size (which just happens to be the same here.)
279    //
280    MemBufInputSource* memBufIS = new MemBufInputSource
281    (
282        (const XMLByte*)gXMLInMemBuf
283        , strlen(gXMLInMemBuf)
284        , gMemBufId
285        , false
286    );
287
288    //
289    //  Get the starting time and kick off the parse of the indicated
290    //  file. Catch any exceptions that might propogate out of it.
291    //
292    unsigned long duration;
293    int errorCount = 0;
294    int errorCode = 0;
295    try
296    {
297        const unsigned long startMillis = XMLPlatformUtils::getCurrentMillis();
298        parser->parse(*memBufIS);
299        const unsigned long endMillis = XMLPlatformUtils::getCurrentMillis();
300        duration = endMillis - startMillis;
301        errorCount = parser->getErrorCount();
302    }
303    catch (const OutOfMemoryException&)
304    {
305        XERCES_STD_QUALIFIER cerr << "OutOfMemoryException" << XERCES_STD_QUALIFIER endl;
306        errorCode = 5;
307    }
308    catch (const XMLException& e)
309    {
310        XERCES_STD_QUALIFIER cerr << "\nError during parsing memory stream:\n"
311             << "Exception message is:  \n"
312             << StrX(e.getMessage()) << "\n" << XERCES_STD_QUALIFIER endl;
313        errorCode = 4;
314    }
315    if(errorCode) {
316        XMLPlatformUtils::Terminate();
317        return errorCode;
318    }
319
320    // Print out the stats that we collected and time taken.
321    if (!errorCount) {
322        XERCES_STD_QUALIFIER cout << "\nFinished parsing the memory buffer containing the following "
323             << "XML statements:\n\n"
324             << gXMLInMemBuf
325             << "\n\n\n"
326             << "Parsing took " << duration << " ms ("
327             << handler.getElementCount() << " elements, "
328             << handler.getAttrCount() << " attributes, "
329             << handler.getSpaceCount() << " spaces, "
330             << handler.getCharacterCount() << " characters).\n" << XERCES_STD_QUALIFIER endl;
331    }
332
333    //
334    //  Delete the parser itself.  Must be done prior to calling Terminate, below.
335    //
336    delete parser;
337
338    delete memBufIS;
339
340    // And call the termination method
341    XMLPlatformUtils::Terminate();
342
343    if (errorCount > 0)
344        return 4;
345    else
346        return 0;
347}
348
Note: See TracBrowser for help on using the repository browser.