/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* * $Id: XMLDocumentHandler.hpp 568078 2007-08-21 11:43:25Z amassari $ */ #if !defined(XMLDOCUMENTHANDLER_HPP) #define XMLDOCUMENTHANDLER_HPP #include #include #include XERCES_CPP_NAMESPACE_BEGIN class XMLElementDecl; class XMLEntityDecl; /** * This abstract class provides the interface for the scanner to return * XML document information up to the parser as it scans through the * document. * * The interface is very similar to org.sax.DocumentHandler, but * has some extra methods required to get all the data out. */ class XMLPARSER_EXPORT XMLDocumentHandler { public: // ----------------------------------------------------------------------- // Constructors are hidden, just the virtual destructor is exposed // ----------------------------------------------------------------------- /** @name Destructor */ //@{ virtual ~XMLDocumentHandler() { } //@} /** @name The document handler interface */ //@{ /** Receive notification of character data. * *

The scanner will call this method to report each chunk of * character data. The scanner may return all contiguous character * data in a single chunk, or they may split it into several * chunks; however, all of the characters in any single event * will come from the same external entity, so that the Locator * provides useful information.

* *

The parser must not attempt to read from the array * outside of the specified range.

* * @param chars The content (characters) between markup from the XML * document. * @param length The number of characters to read from the array. * @param cdataSection Indicates that this data is inside a CDATA * section. * @see #ignorableWhitespace * @see Locator */ virtual void docCharacters ( const XMLCh* const chars , const unsigned int length , const bool cdataSection ) = 0; /** Receive notification of comments in the XML content being parsed. * * This scanner will call this method for any comments found in the * content of the document. * * @param comment The text of the comment. */ virtual void docComment ( const XMLCh* const comment ) = 0; /** Receive notification of PI's parsed in the XML content. * * The scanner will call this method for any PIs it finds within the * content of the document. * * @param target The name of the PI. * @param data The body of the PI. This may be an empty string since * the body is optional. */ virtual void docPI ( const XMLCh* const target , const XMLCh* const data ) = 0; /** Receive notification after the scanner has parsed the end of the * document. * * The scanner will call this method when the current document has been * fully parsed. The handler may use this opportunity to do something with * the data, clean up temporary data, etc... */ virtual void endDocument() = 0; /** Receive notification of the end of an element. * * This method is called when scanner encounters the end of element tag. * There will be a corresponding startElement() event for every * endElement() event, but not necessarily the other way around. For * empty tags, there is only a startElement() call. * * @param elemDecl The name of the element whose end tag was just * parsed. * @param uriId The ID of the URI in the URI pool (only valid if * name spaces is enabled) * @param isRoot Indicates if this is the root element. * @param prefixName The string representing the prefix name */ virtual void endElement ( const XMLElementDecl& elemDecl , const unsigned int uriId , const bool isRoot , const XMLCh* const prefixName = 0 ) = 0; /** Receive notification when a referenced entity's content ends * * This method is called when scanner encounters the end of an entity * reference. * * @param entDecl The name of the entity reference just scanned. */ virtual void endEntityReference ( const XMLEntityDecl& entDecl ) = 0; /** Receive notification of ignorable whitespace in element content. * *

Validating Parsers must use this method to report each chunk * of ignorable whitespace (see the W3C XML 1.0 recommendation, * section 2.10): non-validating parsers may also use this method * if they are capable of parsing and using content models.

* *

The scanner may return all contiguous whitespace in a single * chunk, or it may split it into several chunks; however, all of * the characters in any single event will come from the same * external entity, so that the Locator provides useful * information.

* *

The parser must not attempt to read from the array * outside of the specified range.

* * @param chars The whitespace characters from the XML document. * @param length The number of characters to read from the array. * @param cdataSection Indicates that this data is inside a CDATA * section. * @see #characters */ virtual void ignorableWhitespace ( const XMLCh* const chars , const unsigned int length , const bool cdataSection ) = 0; /** Reset the document handler's state, if required * * This method is used to give the registered document handler a * chance to reset itself. Its called by the scanner at the start of * every parse. */ virtual void resetDocument() = 0; /** Receive notification of the start of a new document * * This method is the first callback called the scanner at the * start of every parse. This is before any content is parsed. */ virtual void startDocument() = 0; /** Receive notification of a new start tag * * This method is called when scanner encounters the start of an element tag. * All elements must always have a startElement() tag. Empty tags will * only have the startElement() tag and no endElement() tag. * * @param elemDecl The name of the element whose start tag was just * parsed. * @param uriId The ID of the URI in the URI pool (only valid if * name spaces is enabled) * @param prefixName The string representing the prefix name * @param attrList List of attributes in the element * @param attrCount Count of the attributes in the element * @param isEmpty Indicates if the element is empty, in which case * you should not expect an endElement() event. * @param isRoot Indicates if this is the root element. */ virtual void startElement ( const XMLElementDecl& elemDecl , const unsigned int uriId , const XMLCh* const prefixName , const RefVectorOf& attrList , const unsigned int attrCount , const bool isEmpty , const bool isRoot ) = 0; /** Receive notification when the scanner hits an entity reference. * * This is currently useful only to DOM parser configurations as SAX * does not provide any api to return this information. * * @param entDecl The name of the entity that was referenced. */ virtual void startEntityReference(const XMLEntityDecl& entDecl) = 0; /** Receive notification of an XML declaration * * Currently neither DOM nor SAX provide API's to return back this * information. * * @param versionStr The value of the version pseudoattribute * of the XML decl. * @param encodingStr The value of the encoding pseudoattribute * of the XML decl. * @param standaloneStr The value of the standalone * pseudoattribute of the XML decl. * @param autoEncodingStr The encoding string auto-detected by the * scanner. In absence of any 'encoding' attribute in the * XML decl, the XML standard specifies how a parser can * auto-detect. If there is no encodingStr * this is what will be used to try to decode the file. */ virtual void XMLDecl ( const XMLCh* const versionStr , const XMLCh* const encodingStr , const XMLCh* const standaloneStr , const XMLCh* const autoEncodingStr ) = 0; /** Receive notification of the name and namespace of the type that validated * the element corresponding to the most recent endElement event. * This event will be fired immediately after the * endElement() event that signifies the end of the element * to which it applies; no other events will intervene. * This method is EXPERIMENTAL and may change, disappear * or become pure virtual at any time. * * This corresponds to a part of the information required by DOM Core * level 3's TypeInfo interface. * * @param typeName local name of the type that actually validated * the content of the element corresponding to the * most recent endElement() callback * @param typeURI namespace of the type that actually validated * the content of the element corresponding to the * most recent endElement() callback * @deprecated */ virtual void elementTypeInfo ( const XMLCh* const /* typeName */ , const XMLCh* const /* typeURI */ ) { /* non pure virtual to permit backward compatibility of implementations. */ }; //@} protected : // ----------------------------------------------------------------------- // Hidden Constructors // ----------------------------------------------------------------------- XMLDocumentHandler() { } private: // ----------------------------------------------------------------------- // Unimplemented constructors and operators // ----------------------------------------------------------------------- XMLDocumentHandler(const XMLDocumentHandler&); XMLDocumentHandler& operator=(const XMLDocumentHandler&); }; XERCES_CPP_NAMESPACE_END #endif