source: NonGTP/Xerces/xerces-c_2_8_0/include/xercesc/parsers/XercesDOMParser.hpp @ 2674

Revision 2674, 26.5 KB checked in by mattausch, 16 years ago (diff)
Line 
1/*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements.  See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License.  You may obtain a copy of the License at
8 *
9 *      http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18/*
19 * $Id: XercesDOMParser.hpp 568078 2007-08-21 11:43:25Z amassari $
20 *
21 */
22
23#if !defined(XercesDOMParser_HPP)
24#define XercesDOMParser_HPP
25
26
27#include <xercesc/parsers/AbstractDOMParser.hpp>
28
29XERCES_CPP_NAMESPACE_BEGIN
30
31
32class EntityResolver;
33class ErrorHandler;
34class Grammar;
35class XMLEntityResolver;
36class XMLResourceIdentifier;
37
38 /**
39  * This class implements the Document Object Model (DOM) interface.
40  * It should be used by applications which choose to parse and
41  * process the XML document using the DOM api's. This implementation
42  * also allows the applications to install an error and an entitty
43  * handler (useful extensions to the DOM specification).
44  *
45  * <p>It can be used to instantiate a validating or non-validating
46  * parser, by setting a member flag.</p>
47  */
48class PARSERS_EXPORT XercesDOMParser : public AbstractDOMParser
49{
50public :
51    // -----------------------------------------------------------------------
52    //  Constructors and Detructor
53    // -----------------------------------------------------------------------
54
55    /** @name Constructors and Destructor */
56    //@{
57    /** Construct a XercesDOMParser, with an optional validator
58      *
59      * Constructor with an instance of validator class to use for
60      * validation. If you don't provide a validator, a default one will
61      * be created for you in the scanner.
62      *
63      * @param gramPool   Pointer to the grammar pool instance from
64      *                   external application.
65      *                   The parser does NOT own it.
66      *
67      * @param valToAdopt Pointer to the validator instance to use. The
68      *                   parser is responsible for freeing the memory.
69      * @param  manager   Pointer to the memory manager to be used to
70      *                   allocate objects.
71      */
72    XercesDOMParser
73    (
74          XMLValidator* const   valToAdopt = 0
75        , MemoryManager* const  manager = XMLPlatformUtils::fgMemoryManager
76        , XMLGrammarPool* const gramPool = 0       
77    );
78
79    /**
80      * Destructor
81      */
82    virtual ~XercesDOMParser();
83
84    //@}
85
86
87    // -----------------------------------------------------------------------
88    //  Getter methods
89    // -----------------------------------------------------------------------
90
91    /** @name Getter methods */
92    //@{
93
94    /** Get a pointer to the error handler
95      *
96      * This method returns the installed error handler. If no handler
97      * has been installed, then it will be a zero pointer.
98      *
99      * @return The pointer to the installed error handler object.
100      */
101    ErrorHandler* getErrorHandler();
102
103    /** Get a const pointer to the error handler
104      *
105      * This method returns the installed error handler.  If no handler
106      * has been installed, then it will be a zero pointer.
107      *
108      * @return A const pointer to the installed error handler object.
109      */
110    const ErrorHandler* getErrorHandler() const;
111
112    /** Get a pointer to the entity resolver
113      *
114      * This method returns the installed entity resolver.  If no resolver
115      * has been installed, then it will be a zero pointer.
116      *
117      * @return The pointer to the installed entity resolver object.
118      */
119    EntityResolver* getEntityResolver();
120
121    /** Get a const pointer to the entity resolver
122      *
123      * This method returns the installed entity resolver. If no resolver
124      * has been installed, then it will be a zero pointer.
125      *
126      * @return A const pointer to the installed entity resolver object.
127      */
128    const EntityResolver* getEntityResolver() const;
129
130    /**
131      * Get a pointer to the entity resolver
132      *
133      * This method returns the installed entity resolver.  If no resolver
134      * has been installed, then it will be a zero pointer.
135      *
136      * @return The pointer to the installed entity resolver object.
137      */
138    XMLEntityResolver* getXMLEntityResolver();
139 
140    /**
141      * Get a const pointer to the entity resolver
142      *
143      * This method returns the installed entity resolver. If no resolver
144      * has been installed, then it will be a zero pointer.
145      *
146      * @return A const pointer to the installed entity resolver object.
147      */
148    const XMLEntityResolver* getXMLEntityResolver() const;
149
150    /** Get the 'Grammar caching' flag
151      *
152      * This method returns the state of the parser's grammar caching when
153      * parsing an XML document.
154      *
155      * @return true, if the parser is currently configured to
156      *         cache grammars, false otherwise.
157      *
158      * @see #cacheGrammarFromParse
159      */
160    bool isCachingGrammarFromParse() const;
161
162    /** Get the 'Use cached grammar' flag
163      *
164      * This method returns the state of the parser's use of cached grammar
165      * when parsing an XML document.
166      *
167      * @return true, if the parser is currently configured to
168      *         use cached grammars, false otherwise.
169      *
170      * @see #useCachedGrammarInParse
171      */
172    bool isUsingCachedGrammarInParse() const;
173
174    /**
175     * Retrieve the grammar that is associated with the specified namespace key
176     *
177     * @param  nameSpaceKey Namespace key
178     * @return Grammar associated with the Namespace key.
179     */
180    Grammar* getGrammar(const XMLCh* const nameSpaceKey);
181
182    /**
183     * Retrieve the grammar where the root element is declared.
184     *
185     * @return Grammar where root element declared
186     */
187    Grammar* getRootGrammar();
188
189    /**
190     * Returns the string corresponding to a URI id from the URI string pool.
191     *
192     * @param uriId id of the string in the URI string pool.
193     * @return URI string corresponding to the URI id.
194     */
195    const XMLCh* getURIText(unsigned int uriId) const;
196
197    /**
198     * Returns the current src offset within the input source.
199     * To be used only while parsing is in progress.
200     *
201     * @return offset within the input source
202     */
203    unsigned int getSrcOffset() const;
204
205    /** Get the 'ignore cached DTD grammar' flag
206      *   
207      * @return true, if the parser is currently configured to
208      *         ignore cached DTD, false otherwise.
209      *
210      * @see #setIgnoreCachedDTD
211      */
212    bool getIgnoreCachedDTD() const;
213
214    //@}
215
216
217    // -----------------------------------------------------------------------
218    //  Setter methods
219    // -----------------------------------------------------------------------
220
221    /** @name Setter methods */
222    //@{
223
224    /** Set the error handler
225      *
226      * This method allows applications to install their own error handler
227      * to trap error and warning messages.
228      *
229      * <i>Any previously set handler is merely dropped, since the parser
230      * does not own them.</i>
231      *
232      * @param handler  A const pointer to the user supplied error
233      *                 handler.
234      *
235      * @see #getErrorHandler
236      */
237    void setErrorHandler(ErrorHandler* const handler);
238
239    /** Set the entity resolver
240      *
241      * This method allows applications to install their own entity
242      * resolver. By installing an entity resolver, the applications
243      * can trap and potentially redirect references to external
244      * entities.
245      *
246      * <i>Any previously set entity resolver is merely dropped, since the parser
247      * does not own them.  If both setEntityResolver and setXMLEntityResolver
248      * are called, then the last one is used.</i>
249      *
250      * @param handler  A const pointer to the user supplied entity
251      *                 resolver.
252      *
253      * @see #getEntityResolver
254      */
255    void setEntityResolver(EntityResolver* const handler);
256
257    /**
258      * Set the entity resolver
259      *
260      * This method allows applications to install their own entity
261      * resolver. By installing an entity resolver, the applications
262      * can trap and potentially redirect references to external
263      * entities.
264      *
265      * <i>Any previously set entity resolver is merely dropped, since the parser
266      * does not own them.  If both setEntityResolver and setXMLEntityResolver
267      * are called, then the last one set is used.</i>
268      *
269      * @param handler  A const pointer to the user supplied entity
270      *                 resolver.
271      *
272      * @see #getXMLEntityResolver
273      */
274    void setXMLEntityResolver(XMLEntityResolver* const handler);
275
276    /** Set the 'Grammar caching' flag
277      *
278      * This method allows users to enable or disable caching of grammar when
279      * parsing XML documents. When set to true, the parser will cache the
280      * resulting grammar for use in subsequent parses.
281      *
282      * If the flag is set to true, the 'Use cached grammar' flag will also be
283      * set to true.
284      *
285      * The parser's default state is: false.
286      *
287      * @param newState The value specifying whether we should cache grammars
288      *                 or not.
289      *
290      * @see #isCachingGrammarFromParse
291      * @see #useCachedGrammarInParse
292      */
293    void cacheGrammarFromParse(const bool newState);
294
295    /** Set the 'Use cached grammar' flag
296      *
297      * This method allows users to enable or disable the use of cached
298      * grammars.  When set to true, the parser will use the cached grammar,
299      * instead of building the grammar from scratch, to validate XML
300      * documents.
301      *
302      * If the 'Grammar caching' flag is set to true, this mehod ignore the
303      * value passed in.
304      *
305      * The parser's default state is: false.
306      *
307      * @param newState The value specifying whether we should use the cached
308      *                 grammar or not.
309      *
310      * @see #isUsingCachedGrammarInParse
311      * @see #cacheGrammarFromParse
312      */
313    void useCachedGrammarInParse(const bool newState);
314
315    /** Set the 'ignore cached DTD grammar' flag
316      *
317      * This method gives users the option to ignore a cached DTD grammar, when
318      * an XML document contains both an internal and external DTD, and the use
319      * cached grammar from parse option is enabled. Currently, we do not allow
320      * using cached DTD grammar when an internal subset is present in the
321      * document. This option will only affect the behavior of the parser when
322      * an internal and external DTD both exist in a document (i.e. no effect
323      * if document has no internal subset).
324      *
325      * The parser's default state is false
326      *
327      * @param newValue The state to set
328      */
329    void setIgnoreCachedDTD(const bool newValue);
330
331    //@}
332
333    // -----------------------------------------------------------------------
334    //  Utility methods
335    // -----------------------------------------------------------------------
336
337    /** @name Utility methods */
338    //@{
339    /** Reset the documents vector pool and release all the associated memory
340      * back to the system.
341      *
342      * When parsing a document using a DOM parser, all memory allocated
343      * for a DOM tree is associated to the DOM document.
344      *
345      * If you do multiple parse using the same DOM parser instance, then
346      * multiple DOM documents will be generated and saved in a vector pool.
347      * All these documents (and thus all the allocated memory)
348      * won't be deleted until the parser instance is destroyed.
349      *
350      * If you don't need these DOM documents anymore and don't want to
351      * destroy the DOM parser instance at this moment, then you can call this method
352      * to reset the document vector pool and release all the allocated memory
353      * back to the system.
354      *
355      * It is an error to call this method if you are in the middle of a
356      * parse (e.g. in the mid of a progressive parse).
357      *
358      * @exception IOException An exception from the parser if this function
359      *            is called when a parse is in progress.
360      *
361      */
362    void resetDocumentPool();
363
364    //@}
365
366    // -----------------------------------------------------------------------
367    //  Implementation of the XMLErrorReporter interface.
368    // -----------------------------------------------------------------------
369
370    /** @name Implementation of the XMLErrorReporter interface. */
371    //@{
372
373    /** Handle errors reported from the parser
374      *
375      * This method is used to report back errors found while parsing the
376      * XML file. This method is also borrowed from the SAX specification.
377      * It calls the corresponding user installed Error Handler method:
378      * 'fatal', 'error', 'warning' depending on the severity of the error.
379      * This classification is defined by the XML specification.
380      *
381      * @param errCode An integer code for the error.
382      * @param msgDomain A const pointer to an Unicode string representing
383      *                  the message domain to use.
384      * @param errType An enumeration classifying the severity of the error.
385      * @param errorText A const pointer to an Unicode string representing
386      *                  the text of the error message.
387      * @param systemId  A const pointer to an Unicode string representing
388      *                  the system id of the XML file where this error
389      *                  was discovered.
390      * @param publicId  A const pointer to an Unicode string representing
391      *                  the public id of the XML file where this error
392      *                  was discovered.
393      * @param lineNum   The line number where the error occurred.
394      * @param colNum    The column number where the error occurred.
395      * @see ErrorHandler
396      */
397    virtual void error
398    (
399        const   unsigned int                errCode
400        , const XMLCh* const                msgDomain
401        , const XMLErrorReporter::ErrTypes  errType
402        , const XMLCh* const                errorText
403        , const XMLCh* const                systemId
404        , const XMLCh* const                publicId
405        , const XMLSSize_t                  lineNum
406        , const XMLSSize_t                  colNum
407    );
408
409    /** Reset any error data before a new parse
410     *
411      * This method allows the user installed Error Handler callback to
412      * 'reset' itself.
413      *
414      * <b>This method is a no-op for this DOM
415      * implementation.</b>
416      */
417    virtual void resetErrors();
418    //@}
419
420
421    // -----------------------------------------------------------------------
422    //  Implementation of the XMLEntityHandler interface.
423    // -----------------------------------------------------------------------
424
425    /** @name Implementation of the XMLEntityHandler interface. */
426    //@{
427
428    /** Handle an end of input source event
429      *
430      * This method is used to indicate the end of parsing of an external
431      * entity file.
432      *
433      * <b>This method is a no-op for this DOM
434      * implementation.</b>
435      *
436      * @param inputSource A const reference to the InputSource object
437      *                    which points to the XML file being parsed.
438      * @see InputSource
439      */
440    virtual void endInputSource(const InputSource& inputSource);
441
442    /** Expand a system id
443      *
444      * This method allows an installed XMLEntityHandler to further
445      * process any system id's of enternal entities encountered in
446      * the XML file being parsed, such as redirection etc.
447      *
448      * <b>This method always returns 'false'
449      * for this DOM implementation.</b>
450      *
451      * @param systemId  A const pointer to an Unicode string representing
452      *                  the system id scanned by the parser.
453      * @param toFill    A pointer to a buffer in which the application
454      *                  processed system id is stored.
455      * @return 'true', if any processing is done, 'false' otherwise.
456      */
457    virtual bool expandSystemId
458    (
459        const   XMLCh* const    systemId
460        ,       XMLBuffer&      toFill
461    );
462
463    /** Reset any entity handler information
464      *
465      * This method allows the installed XMLEntityHandler to reset
466      * itself.
467      *
468      * <b>This method is a no-op for this DOM
469      * implementation.</b>
470      */
471    virtual void resetEntities();
472
473    /** Resolve a public/system id
474      *
475      * This method allows a user installed entity handler to further
476      * process any pointers to external entities. The applications can
477      * implement 'redirection' via this callback. This method is also
478      * borrowed from the SAX specification.
479      *
480      * @deprecated This method is no longer called (the other resolveEntity one is).
481      *
482      * @param publicId A const pointer to a Unicode string representing the
483      *                 public id of the entity just parsed.
484      * @param systemId A const pointer to a Unicode string representing the
485      *                 system id of the entity just parsed.
486      * @param baseURI  A const pointer to a Unicode string representing the
487      *                 base URI of the entity just parsed,
488      *                 or <code>null</code> if there is no base URI.
489      * @return The value returned by the user installed resolveEntity
490      *         method or NULL otherwise to indicate no processing was done.
491      *         The returned InputSource is owned by the parser which is
492      *         responsible to clean up the memory.
493      * @see DOMEntityResolver
494      * @see XMLEntityHandler
495      */
496    virtual InputSource* resolveEntity
497    (
498        const   XMLCh* const    publicId
499        , const XMLCh* const    systemId
500        , const XMLCh* const    baseURI = 0
501    );
502
503    /** Resolve a public/system id
504      *
505      * This method allows a user installed entity handler to further
506      * process any pointers to external entities. The applications can
507      * implement 'redirection' via this callback. 
508      *
509      * @param resourceIdentifier An object containing the type of
510      *        resource to be resolved and the associated data members
511      *        corresponding to this type.
512      * @return The value returned by the user installed resolveEntity
513      *         method or NULL otherwise to indicate no processing was done.
514      *         The returned InputSource is owned by the parser which is
515      *         responsible to clean up the memory.
516      * @see XMLEntityHandler
517      * @see XMLEntityResolver
518      */
519    virtual InputSource* resolveEntity
520    (
521        XMLResourceIdentifier* resourceIdentifier
522    );
523
524    /** Handle a 'start input source' event
525      *
526      * This method is used to indicate the start of parsing an external
527      * entity file.
528      *
529      * <b>This method is a no-op for this DOM parse
530      * implementation.</b>
531      *
532      * @param inputSource A const reference to the InputSource object
533      *                    which points to the external entity
534      *                    being parsed.
535      */
536    virtual void startInputSource(const InputSource& inputSource);
537
538    //@}
539
540    // -----------------------------------------------------------------------
541    //  Grammar preparsing interface
542    // -----------------------------------------------------------------------
543
544    /** @name Implementation of Grammar preparsing interface's. */
545    //@{
546    /**
547      * Preparse schema grammar (XML Schema, DTD, etc.) via an input source
548      * object.
549      *
550      * This method invokes the preparsing process on a schema grammar XML
551      * file specified by the SAX InputSource parameter. If the 'toCache' flag
552      * is enabled, the parser will cache the grammars for re-use. If a grammar
553      * key is found in the pool, no caching of any grammar will take place.
554      *
555      * <p><b>"Experimental - subject to change"</b></p>
556      *
557      * @param source A const reference to the SAX InputSource object which
558      *               points to the schema grammar file to be preparsed.
559      * @param grammarType The grammar type (Schema or DTD).
560      * @param toCache If <code>true</code>, we cache the preparsed grammar,
561      *                otherwise, no chaching. Default is <code>false</code>.
562      * @return The preparsed schema grammar object (SchemaGrammar or
563      *         DTDGrammar). That grammar object is owned by the parser.
564      *
565      * @exception SAXException Any SAX exception, possibly
566      *            wrapping another exception.
567      * @exception XMLException An exception from the parser or client
568      *            handler code.
569      * @exception DOMException A DOM exception as per DOM spec.
570      *
571      * @see InputSource#InputSource
572      */
573    Grammar* loadGrammar(const InputSource& source,
574                         const short grammarType,
575                         const bool toCache = false);
576
577    /**
578      * Preparse schema grammar (XML Schema, DTD, etc.) via a file path or URL
579      *
580      * This method invokes the preparsing process on a schema grammar XML
581      * file specified by the file path parameter. If the 'toCache' flag
582      * is enabled, the parser will cache the grammars for re-use. If a grammar
583      * key is found in the pool, no caching of any grammar will take place.
584      *
585      * <p><b>"Experimental - subject to change"</b></p>
586      *
587      * @param systemId A const XMLCh pointer to the Unicode string which
588      *                 contains the path to the XML grammar file to be
589      *                 preparsed.
590      * @param grammarType The grammar type (Schema or DTD).
591      * @param toCache If <code>true</code>, we cache the preparsed grammar,
592      *                otherwise, no chaching. Default is <code>false</code>.
593      * @return The preparsed schema grammar object (SchemaGrammar or
594      *         DTDGrammar). That grammar object is owned by the parser.
595      *
596      * @exception SAXException Any SAX exception, possibly
597      *            wrapping another exception.
598      * @exception XMLException An exception from the parser or client
599      *            handler code.
600      * @exception DOMException A DOM exception as per DOM spec.
601      */
602    Grammar* loadGrammar(const XMLCh* const systemId,
603                         const short grammarType,
604                         const bool toCache = false);
605
606    /**
607      * Preparse schema grammar (XML Schema, DTD, etc.) via a file path or URL
608      *
609      * This method invokes the preparsing process on a schema grammar XML
610      * file specified by the file path parameter. If the 'toCache' flag
611      * is enabled, the parser will cache the grammars for re-use. If a grammar
612      * key is found in the pool, no caching of any grammar will take place.
613      *
614      * <p><b>"Experimental - subject to change"</b></p>
615      *
616      * @param systemId A const char pointer to a native string which contains
617      *                 the path to the XML grammar file to be preparsed.
618      * @param grammarType The grammar type (Schema or DTD).
619      * @param toCache If <code>true</code>, we cache the preparsed grammar,
620      *                otherwise, no chaching. Default is <code>false</code>.
621      * @return The preparsed schema grammar object (SchemaGrammar or
622      *         DTDGrammar). That grammar object is owned by the parser.
623      *
624      * @exception SAXException Any SAX exception, possibly
625      *            wrapping another exception.
626      * @exception XMLException An exception from the parser or client
627      *            handler code.
628      * @exception DOMException A DOM exception as per DOM spec.
629      */
630    Grammar* loadGrammar(const char* const systemId,
631                         const short grammarType,
632                         const bool toCache = false);
633
634    /**
635      * This method allows the user to reset the pool of cached grammars.
636      */
637    void resetCachedGrammarPool();
638
639    //@}
640
641
642private :
643    // -----------------------------------------------------------------------
644    //  Initialize/Cleanup methods
645    // -----------------------------------------------------------------------
646    void resetParse();
647
648    // -----------------------------------------------------------------------
649    //  Unimplemented constructors and operators
650    // -----------------------------------------------------------------------
651    XercesDOMParser(const XercesDOMParser&);
652    XercesDOMParser& operator=(const XercesDOMParser&);
653
654    // -----------------------------------------------------------------------
655    //  Private data members
656    //
657    //  fEntityResolver
658    //      The installed SAX entity resolver, if any. Null if none.
659    //
660    //  fErrorHandler
661    //      The installed SAX error handler, if any. Null if none.
662    //-----------------------------------------------------------------------
663    EntityResolver*          fEntityResolver;
664    XMLEntityResolver*       fXMLEntityResolver;
665    ErrorHandler*            fErrorHandler;
666};
667
668
669
670// ---------------------------------------------------------------------------
671//  XercesDOMParser: Handlers for the XMLEntityHandler interface
672// ---------------------------------------------------------------------------
673inline void XercesDOMParser::endInputSource(const InputSource&)
674{
675    // The DOM entity resolver doesn't handle this
676}
677
678inline bool XercesDOMParser::expandSystemId(const XMLCh* const, XMLBuffer&)
679{
680    // The DOM entity resolver doesn't handle this
681    return false;
682}
683
684inline void XercesDOMParser::resetEntities()
685{
686    // Nothing to do on this one
687}
688
689inline void XercesDOMParser::startInputSource(const InputSource&)
690{
691    // The DOM entity resolver doesn't handle this
692}
693
694
695// ---------------------------------------------------------------------------
696//  XercesDOMParser: Getter methods
697// ---------------------------------------------------------------------------
698inline ErrorHandler* XercesDOMParser::getErrorHandler()
699{
700    return fErrorHandler;
701}
702
703inline const ErrorHandler* XercesDOMParser::getErrorHandler() const
704{
705    return fErrorHandler;
706}
707
708inline EntityResolver* XercesDOMParser::getEntityResolver()
709{
710    return fEntityResolver;
711}
712
713inline const EntityResolver* XercesDOMParser::getEntityResolver() const
714{
715    return fEntityResolver;
716}
717
718inline XMLEntityResolver* XercesDOMParser::getXMLEntityResolver()
719{
720    return fXMLEntityResolver;
721}
722
723inline const XMLEntityResolver* XercesDOMParser::getXMLEntityResolver() const
724{
725    return fXMLEntityResolver;
726}
727
728XERCES_CPP_NAMESPACE_END
729
730#endif
Note: See TracBrowser for help on using the repository browser.