source: NonGTP/Xerces/xerces/include/xercesc/parsers/XercesDOMParser.hpp @ 358

Revision 358, 25.8 KB checked in by bittner, 19 years ago (diff)

xerces added

Line 
1/*
2 * Copyright 2001-2002,2004 The Apache Software Foundation.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17/*
18 * $Id: XercesDOMParser.hpp,v 1.20 2004/09/08 13:56:18 peiyongz Exp $
19 *
20 */
21
22#if !defined(XercesDOMParser_HPP)
23#define XercesDOMParser_HPP
24
25
26#include <xercesc/parsers/AbstractDOMParser.hpp>
27
28XERCES_CPP_NAMESPACE_BEGIN
29
30
31class EntityResolver;
32class ErrorHandler;
33class Grammar;
34class XMLEntityResolver;
35class XMLResourceIdentifier;
36
37 /**
38  * This class implements the Document Object Model (DOM) interface.
39  * It should be used by applications which choose to parse and
40  * process the XML document using the DOM api's. This implementation
41  * also allows the applications to install an error and an entitty
42  * handler (useful extensions to the DOM specification).
43  *
44  * <p>It can be used to instantiate a validating or non-validating
45  * parser, by setting a member flag.</p>
46  */
47class PARSERS_EXPORT XercesDOMParser : public AbstractDOMParser
48{
49public :
50    // -----------------------------------------------------------------------
51    //  Constructors and Detructor
52    // -----------------------------------------------------------------------
53
54    /** @name Constructors and Destructor */
55    //@{
56    /** Construct a XercesDOMParser, with an optional validator
57      *
58      * Constructor with an instance of validator class to use for
59      * validation. If you don't provide a validator, a default one will
60      * be created for you in the scanner.
61      *
62      * @param gramPool   Pointer to the grammar pool instance from
63      *                   external application.
64      *                   The parser does NOT own it.
65      *
66      * @param valToAdopt Pointer to the validator instance to use. The
67      *                   parser is responsible for freeing the memory.
68      * @param  manager   Pointer to the memory manager to be used to
69      *                   allocate objects.
70      */
71    XercesDOMParser
72    (
73          XMLValidator* const   valToAdopt = 0
74        , MemoryManager* const  manager = XMLPlatformUtils::fgMemoryManager
75        , XMLGrammarPool* const gramPool = 0       
76    );
77
78    /**
79      * Destructor
80      */
81    virtual ~XercesDOMParser();
82
83    //@}
84
85
86    // -----------------------------------------------------------------------
87    //  Getter methods
88    // -----------------------------------------------------------------------
89
90    /** @name Getter methods */
91    //@{
92
93    /** Get a pointer to the error handler
94      *
95      * This method returns the installed error handler. If no handler
96      * has been installed, then it will be a zero pointer.
97      *
98      * @return The pointer to the installed error handler object.
99      */
100    ErrorHandler* getErrorHandler();
101
102    /** Get a const pointer to the error handler
103      *
104      * This method returns the installed error handler.  If no handler
105      * has been installed, then it will be a zero pointer.
106      *
107      * @return A const pointer to the installed error handler object.
108      */
109    const ErrorHandler* getErrorHandler() const;
110
111    /** Get a pointer to the entity resolver
112      *
113      * This method returns the installed entity resolver.  If no resolver
114      * has been installed, then it will be a zero pointer.
115      *
116      * @return The pointer to the installed entity resolver object.
117      */
118    EntityResolver* getEntityResolver();
119
120    /** Get a const pointer to the entity resolver
121      *
122      * This method returns the installed entity resolver. If no resolver
123      * has been installed, then it will be a zero pointer.
124      *
125      * @return A const pointer to the installed entity resolver object.
126      */
127    const EntityResolver* getEntityResolver() const;
128
129    /**
130      * Get a pointer to the entity resolver
131      *
132      * This method returns the installed entity resolver.  If no resolver
133      * has been installed, then it will be a zero pointer.
134      *
135      * @return The pointer to the installed entity resolver object.
136      */
137    XMLEntityResolver* getXMLEntityResolver();
138 
139    /**
140      * Get a const pointer to the entity resolver
141      *
142      * This method returns the installed entity resolver. If no resolver
143      * has been installed, then it will be a zero pointer.
144      *
145      * @return A const pointer to the installed entity resolver object.
146      */
147    const XMLEntityResolver* getXMLEntityResolver() const;
148
149    /** Get the 'Grammar caching' flag
150      *
151      * This method returns the state of the parser's grammar caching when
152      * parsing an XML document.
153      *
154      * @return true, if the parser is currently configured to
155      *         cache grammars, false otherwise.
156      *
157      * @see #cacheGrammarFromParse
158      */
159    bool isCachingGrammarFromParse() const;
160
161    /** Get the 'Use cached grammar' flag
162      *
163      * This method returns the state of the parser's use of cached grammar
164      * when parsing an XML document.
165      *
166      * @return true, if the parser is currently configured to
167      *         use cached grammars, false otherwise.
168      *
169      * @see #useCachedGrammarInParse
170      */
171    bool isUsingCachedGrammarInParse() const;
172
173    /**
174     * Retrieve the grammar that is associated with the specified namespace key
175     *
176     * @param  nameSpaceKey Namespace key
177     * @return Grammar associated with the Namespace key.
178     */
179    Grammar* getGrammar(const XMLCh* const nameSpaceKey);
180
181    /**
182     * Retrieve the grammar where the root element is declared.
183     *
184     * @return Grammar where root element declared
185     */
186    Grammar* getRootGrammar();
187
188    /**
189     * Returns the string corresponding to a URI id from the URI string pool.
190     *
191     * @param uriId id of the string in the URI string pool.
192     * @return URI string corresponding to the URI id.
193     */
194    const XMLCh* getURIText(unsigned int uriId) const;
195
196    /**
197     * Returns the current src offset within the input source.
198     *
199     * @return offset within the input source
200     */
201    unsigned int getSrcOffset() const;
202
203    //@}
204
205
206    // -----------------------------------------------------------------------
207    //  Setter methods
208    // -----------------------------------------------------------------------
209
210    /** @name Setter methods */
211    //@{
212
213    /** Set the error handler
214      *
215      * This method allows applications to install their own error handler
216      * to trap error and warning messages.
217      *
218      * <i>Any previously set handler is merely dropped, since the parser
219      * does not own them.</i>
220      *
221      * @param handler  A const pointer to the user supplied error
222      *                 handler.
223      *
224      * @see #getErrorHandler
225      */
226    void setErrorHandler(ErrorHandler* const handler);
227
228    /** Set the entity resolver
229      *
230      * This method allows applications to install their own entity
231      * resolver. By installing an entity resolver, the applications
232      * can trap and potentially redirect references to external
233      * entities.
234      *
235      * <i>Any previously set entity resolver is merely dropped, since the parser
236      * does not own them.  If both setEntityResolver and setXMLEntityResolver
237      * are called, then the last one is used.</i>
238      *
239      * @param handler  A const pointer to the user supplied entity
240      *                 resolver.
241      *
242      * @see #getEntityResolver
243      */
244    void setEntityResolver(EntityResolver* const handler);
245
246    /**
247      * Set the entity resolver
248      *
249      * This method allows applications to install their own entity
250      * resolver. By installing an entity resolver, the applications
251      * can trap and potentially redirect references to external
252      * entities.
253      *
254      * <i>Any previously set entity resolver is merely dropped, since the parser
255      * does not own them.  If both setEntityResolver and setXMLEntityResolver
256      * are called, then the last one set is used.</i>
257      *
258      * @param handler  A const pointer to the user supplied entity
259      *                 resolver.
260      *
261      * @see #getXMLEntityResolver
262      */
263    void setXMLEntityResolver(XMLEntityResolver* const handler);
264
265    /** Set the 'Grammar caching' flag
266      *
267      * This method allows users to enable or disable caching of grammar when
268      * parsing XML documents. When set to true, the parser will cache the
269      * resulting grammar for use in subsequent parses.
270      *
271      * If the flag is set to true, the 'Use cached grammar' flag will also be
272      * set to true.
273      *
274      * The parser's default state is: false.
275      *
276      * @param newState The value specifying whether we should cache grammars
277      *                 or not.
278      *
279      * @see #isCachingGrammarFromParse
280      * @see #useCachedGrammarInParse
281      */
282    void cacheGrammarFromParse(const bool newState);
283
284    /** Set the 'Use cached grammar' flag
285      *
286      * This method allows users to enable or disable the use of cached
287      * grammars.  When set to true, the parser will use the cached grammar,
288      * instead of building the grammar from scratch, to validate XML
289      * documents.
290      *
291      * If the 'Grammar caching' flag is set to true, this mehod ignore the
292      * value passed in.
293      *
294      * The parser's default state is: false.
295      *
296      * @param newState The value specifying whether we should use the cached
297      *                 grammar or not.
298      *
299      * @see #isUsingCachedGrammarInParse
300      * @see #cacheGrammarFromParse
301      */
302    void useCachedGrammarInParse(const bool newState);
303
304    //@}
305
306    // -----------------------------------------------------------------------
307    //  Utility methods
308    // -----------------------------------------------------------------------
309
310    /** @name Utility methods */
311    //@{
312    /** Reset the documents vector pool and release all the associated memory
313      * back to the system.
314      *
315      * When parsing a document using a DOM parser, all memory allocated
316      * for a DOM tree is associated to the DOM document.
317      *
318      * If you do multiple parse using the same DOM parser instance, then
319      * multiple DOM documents will be generated and saved in a vector pool.
320      * All these documents (and thus all the allocated memory)
321      * won't be deleted until the parser instance is destroyed.
322      *
323      * If you don't need these DOM documents anymore and don't want to
324      * destroy the DOM parser instance at this moment, then you can call this method
325      * to reset the document vector pool and release all the allocated memory
326      * back to the system.
327      *
328      * It is an error to call this method if you are in the middle of a
329      * parse (e.g. in the mid of a progressive parse).
330      *
331      * @exception IOException An exception from the parser if this function
332      *            is called when a parse is in progress.
333      *
334      */
335    void resetDocumentPool();
336
337    //@}
338
339    // -----------------------------------------------------------------------
340    //  Implementation of the XMLErrorReporter interface.
341    // -----------------------------------------------------------------------
342
343    /** @name Implementation of the XMLErrorReporter interface. */
344    //@{
345
346    /** Handle errors reported from the parser
347      *
348      * This method is used to report back errors found while parsing the
349      * XML file. This method is also borrowed from the SAX specification.
350      * It calls the corresponding user installed Error Handler method:
351      * 'fatal', 'error', 'warning' depending on the severity of the error.
352      * This classification is defined by the XML specification.
353      *
354      * @param errCode An integer code for the error.
355      * @param msgDomain A const pointer to an Unicode string representing
356      *                  the message domain to use.
357      * @param errType An enumeration classifying the severity of the error.
358      * @param errorText A const pointer to an Unicode string representing
359      *                  the text of the error message.
360      * @param systemId  A const pointer to an Unicode string representing
361      *                  the system id of the XML file where this error
362      *                  was discovered.
363      * @param publicId  A const pointer to an Unicode string representing
364      *                  the public id of the XML file where this error
365      *                  was discovered.
366      * @param lineNum   The line number where the error occurred.
367      * @param colNum    The column number where the error occurred.
368      * @see ErrorHandler
369      */
370    virtual void error
371    (
372        const   unsigned int                errCode
373        , const XMLCh* const                msgDomain
374        , const XMLErrorReporter::ErrTypes  errType
375        , const XMLCh* const                errorText
376        , const XMLCh* const                systemId
377        , const XMLCh* const                publicId
378        , const XMLSSize_t                  lineNum
379        , const XMLSSize_t                  colNum
380    );
381
382    /** Reset any error data before a new parse
383     *
384      * This method allows the user installed Error Handler callback to
385      * 'reset' itself.
386      *
387      * <b>This method is a no-op for this DOM
388      * implementation.</b>
389      */
390    virtual void resetErrors();
391    //@}
392
393
394    // -----------------------------------------------------------------------
395    //  Implementation of the XMLEntityHandler interface.
396    // -----------------------------------------------------------------------
397
398    /** @name Implementation of the XMLEntityHandler interface. */
399    //@{
400
401    /** Handle an end of input source event
402      *
403      * This method is used to indicate the end of parsing of an external
404      * entity file.
405      *
406      * <b>This method is a no-op for this DOM
407      * implementation.</b>
408      *
409      * @param inputSource A const reference to the InputSource object
410      *                    which points to the XML file being parsed.
411      * @see InputSource
412      */
413    virtual void endInputSource(const InputSource& inputSource);
414
415    /** Expand a system id
416      *
417      * This method allows an installed XMLEntityHandler to further
418      * process any system id's of enternal entities encountered in
419      * the XML file being parsed, such as redirection etc.
420      *
421      * <b>This method always returns 'false'
422      * for this DOM implementation.</b>
423      *
424      * @param systemId  A const pointer to an Unicode string representing
425      *                  the system id scanned by the parser.
426      * @param toFill    A pointer to a buffer in which the application
427      *                  processed system id is stored.
428      * @return 'true', if any processing is done, 'false' otherwise.
429      */
430    virtual bool expandSystemId
431    (
432        const   XMLCh* const    systemId
433        ,       XMLBuffer&      toFill
434    );
435
436    /** Reset any entity handler information
437      *
438      * This method allows the installed XMLEntityHandler to reset
439      * itself.
440      *
441      * <b>This method is a no-op for this DOM
442      * implementation.</b>
443      */
444    virtual void resetEntities();
445
446    /** Resolve a public/system id
447      *
448      * This method allows a user installed entity handler to further
449      * process any pointers to external entities. The applications can
450      * implement 'redirection' via this callback. This method is also
451      * borrowed from the SAX specification.
452      *
453      * @deprecated This method is no longer called (the other resolveEntity one is).
454      *
455      * @param publicId A const pointer to a Unicode string representing the
456      *                 public id of the entity just parsed.
457      * @param systemId A const pointer to a Unicode string representing the
458      *                 system id of the entity just parsed.
459      * @param baseURI  A const pointer to a Unicode string representing the
460      *                 base URI of the entity just parsed,
461      *                 or <code>null</code> if there is no base URI.
462      * @return The value returned by the user installed resolveEntity
463      *         method or NULL otherwise to indicate no processing was done.
464      *         The returned InputSource is owned by the parser which is
465      *         responsible to clean up the memory.
466      * @see DOMEntityResolver
467      * @see XMLEntityHandler
468      */
469    virtual InputSource* resolveEntity
470    (
471        const   XMLCh* const    publicId
472        , const XMLCh* const    systemId
473        , const XMLCh* const    baseURI = 0
474    );
475
476    /** Resolve a public/system id
477      *
478      * This method allows a user installed entity handler to further
479      * process any pointers to external entities. The applications can
480      * implement 'redirection' via this callback. 
481      *
482      * @param resourceIdentifier An object containing the type of
483      *        resource to be resolved and the associated data members
484      *        corresponding to this type.
485      * @return The value returned by the user installed resolveEntity
486      *         method or NULL otherwise to indicate no processing was done.
487      *         The returned InputSource is owned by the parser which is
488      *         responsible to clean up the memory.
489      * @see XMLEntityHandler
490      * @see XMLEntityResolver
491      */
492    virtual InputSource* resolveEntity
493    (
494        XMLResourceIdentifier* resourceIdentifier
495    );
496
497    /** Handle a 'start input source' event
498      *
499      * This method is used to indicate the start of parsing an external
500      * entity file.
501      *
502      * <b>This method is a no-op for this DOM parse
503      * implementation.</b>
504      *
505      * @param inputSource A const reference to the InputSource object
506      *                    which points to the external entity
507      *                    being parsed.
508      */
509    virtual void startInputSource(const InputSource& inputSource);
510
511    //@}
512
513    // -----------------------------------------------------------------------
514    //  Grammar preparsing interface
515    // -----------------------------------------------------------------------
516
517    /** @name Implementation of Grammar preparsing interface's. */
518    //@{
519    /**
520      * Preparse schema grammar (XML Schema, DTD, etc.) via an input source
521      * object.
522      *
523      * This method invokes the preparsing process on a schema grammar XML
524      * file specified by the SAX InputSource parameter. If the 'toCache' flag
525      * is enabled, the parser will cache the grammars for re-use. If a grammar
526      * key is found in the pool, no caching of any grammar will take place.
527      *
528      * <p><b>"Experimental - subject to change"</b></p>
529      *
530      * @param source A const reference to the SAX InputSource object which
531      *               points to the schema grammar file to be preparsed.
532      * @param grammarType The grammar type (Schema or DTD).
533      * @param toCache If <code>true</code>, we cache the preparsed grammar,
534      *                otherwise, no chaching. Default is <code>false</code>.
535      * @return The preparsed schema grammar object (SchemaGrammar or
536      *         DTDGrammar). That grammar object is owned by the parser.
537      *
538      * @exception SAXException Any SAX exception, possibly
539      *            wrapping another exception.
540      * @exception XMLException An exception from the parser or client
541      *            handler code.
542      * @exception DOMException A DOM exception as per DOM spec.
543      *
544      * @see InputSource#InputSource
545      */
546    Grammar* loadGrammar(const InputSource& source,
547                         const short grammarType,
548                         const bool toCache = false);
549
550    /**
551      * Preparse schema grammar (XML Schema, DTD, etc.) via a file path or URL
552      *
553      * This method invokes the preparsing process on a schema grammar XML
554      * file specified by the file path parameter. If the 'toCache' flag
555      * is enabled, the parser will cache the grammars for re-use. If a grammar
556      * key is found in the pool, no caching of any grammar will take place.
557      *
558      * <p><b>"Experimental - subject to change"</b></p>
559      *
560      * @param systemId A const XMLCh pointer to the Unicode string which
561      *                 contains the path to the XML grammar file to be
562      *                 preparsed.
563      * @param grammarType The grammar type (Schema or DTD).
564      * @param toCache If <code>true</code>, we cache the preparsed grammar,
565      *                otherwise, no chaching. Default is <code>false</code>.
566      * @return The preparsed schema grammar object (SchemaGrammar or
567      *         DTDGrammar). That grammar object is owned by the parser.
568      *
569      * @exception SAXException Any SAX exception, possibly
570      *            wrapping another exception.
571      * @exception XMLException An exception from the parser or client
572      *            handler code.
573      * @exception DOMException A DOM exception as per DOM spec.
574      */
575    Grammar* loadGrammar(const XMLCh* const systemId,
576                         const short grammarType,
577                         const bool toCache = false);
578
579    /**
580      * Preparse schema grammar (XML Schema, DTD, etc.) via a file path or URL
581      *
582      * This method invokes the preparsing process on a schema grammar XML
583      * file specified by the file path parameter. If the 'toCache' flag
584      * is enabled, the parser will cache the grammars for re-use. If a grammar
585      * key is found in the pool, no caching of any grammar will take place.
586      *
587      * <p><b>"Experimental - subject to change"</b></p>
588      *
589      * @param systemId A const char pointer to a native string which contains
590      *                 the path to the XML grammar file to be preparsed.
591      * @param grammarType The grammar type (Schema or DTD).
592      * @param toCache If <code>true</code>, we cache the preparsed grammar,
593      *                otherwise, no chaching. Default is <code>false</code>.
594      * @return The preparsed schema grammar object (SchemaGrammar or
595      *         DTDGrammar). That grammar object is owned by the parser.
596      *
597      * @exception SAXException Any SAX exception, possibly
598      *            wrapping another exception.
599      * @exception XMLException An exception from the parser or client
600      *            handler code.
601      * @exception DOMException A DOM exception as per DOM spec.
602      */
603    Grammar* loadGrammar(const char* const systemId,
604                         const short grammarType,
605                         const bool toCache = false);
606
607    /**
608      * This method allows the user to reset the pool of cached grammars.
609      */
610    void resetCachedGrammarPool();
611
612    //@}
613
614
615private :
616    // -----------------------------------------------------------------------
617    //  Unimplemented constructors and operators
618    // -----------------------------------------------------------------------
619    XercesDOMParser(const XercesDOMParser&);
620    XercesDOMParser& operator=(const XercesDOMParser&);
621
622    // -----------------------------------------------------------------------
623    //  Private data members
624    //
625    //  fEntityResolver
626    //      The installed SAX entity resolver, if any. Null if none.
627    //
628    //  fErrorHandler
629    //      The installed SAX error handler, if any. Null if none.
630    //-----------------------------------------------------------------------
631    EntityResolver*          fEntityResolver;
632    XMLEntityResolver*       fXMLEntityResolver;
633    ErrorHandler*            fErrorHandler;
634};
635
636
637
638// ---------------------------------------------------------------------------
639//  XercesDOMParser: Handlers for the XMLEntityHandler interface
640// ---------------------------------------------------------------------------
641inline void XercesDOMParser::endInputSource(const InputSource&)
642{
643    // The DOM entity resolver doesn't handle this
644}
645
646inline bool XercesDOMParser::expandSystemId(const XMLCh* const, XMLBuffer&)
647{
648    // The DOM entity resolver doesn't handle this
649    return false;
650}
651
652inline void XercesDOMParser::resetEntities()
653{
654    // Nothing to do on this one
655}
656
657inline void XercesDOMParser::startInputSource(const InputSource&)
658{
659    // The DOM entity resolver doesn't handle this
660}
661
662
663// ---------------------------------------------------------------------------
664//  XercesDOMParser: Getter methods
665// ---------------------------------------------------------------------------
666inline ErrorHandler* XercesDOMParser::getErrorHandler()
667{
668    return fErrorHandler;
669}
670
671inline const ErrorHandler* XercesDOMParser::getErrorHandler() const
672{
673    return fErrorHandler;
674}
675
676inline EntityResolver* XercesDOMParser::getEntityResolver()
677{
678    return fEntityResolver;
679}
680
681inline const EntityResolver* XercesDOMParser::getEntityResolver() const
682{
683    return fEntityResolver;
684}
685
686inline XMLEntityResolver* XercesDOMParser::getXMLEntityResolver()
687{
688    return fXMLEntityResolver;
689}
690
691inline const XMLEntityResolver* XercesDOMParser::getXMLEntityResolver() const
692{
693    return fXMLEntityResolver;
694}
695
696XERCES_CPP_NAMESPACE_END
697
698#endif
Note: See TracBrowser for help on using the repository browser.