The default value is 'false'.
* * @param newState If true, the parser will exit if "setExitOnFirstFatalError" * is set to true. * * @see #getValidationConstraintFatal * @see #setExitOnFirstFatalError */ void setValidationConstraintFatal(const bool newState); /** Set the 'include entity references' flag * * This method allows the user to specify whether the parser should * create entity reference nodes in the DOM tree being produced. * When the 'create' flag is * true, the parser will create EntityReference nodes in the DOM tree. * The EntityReference nodes and their child nodes will be read-only. * When the 'create' flag is false, no EntityReference nodes will be created. *The replacement text * of the entity is included in either case, either as a * child of the Entity Reference node or in place at the location * of the reference. *
The default value is 'true'. * * @param create The new state of the create entity reference nodes * flag. * @see #getCreateEntityReferenceNodes */ void setCreateEntityReferenceNodes(const bool create); /** Set the 'include ignorable whitespace' flag * * This method allows the user to specify whether a validating parser * should include ignorable whitespaces as text nodes. It has no effect * on non-validating parsers which always include non-markup text. *
When set to true (also the default), ignorable whitespaces will be * added to the DOM tree as text nodes. The method * DOMText::isIgnorableWhitespace() will return true for those text * nodes only. *
When set to false, all ignorable whitespace will be discarded and
* no text node is added to the DOM tree. Note: applications intended
* to process the "xml:space" attribute should not set this flag to false.
* And this flag also overrides any schema datateye whitespace facets,
* that is, all ignorable whitespace will be discarded even though
* 'preserve' is set in schema datatype whitespace facets.
*
* @param include The new state of the include ignorable whitespace
* flag.
*
* @see #getIncludeIgnorableWhitespace
*/
void setIncludeIgnorableWhitespace(const bool include);
/**
* This method allows users to set the validation scheme to be used
* by this parser. The value is one of the ValSchemes enumerated values
* defined by this class:
*
*
Val_Never - turn off validation
*
Val_Always - turn on validation
*
Val_Auto - turn on validation if any internal/external
* DTD subset have been seen
*
*
The parser's default state is: Val_Auto.
* * @param newScheme The new validation scheme to use. * * @see #getValidationScheme */ void setValidationScheme(const ValSchemes newScheme); /** Set the 'do schema' flag * * This method allows users to enable or disable the parser's * schema processing. When set to false, parser will not process * any schema found. * * The parser's default state is: false. * * Note: If set to true, namespace processing must also be turned on. * * @param newState The value specifying whether schema support should * be enforced or not. * * @see #getDoSchema */ void setDoSchema(const bool newState); /** * This method allows the user to turn full Schema constraint checking on/off. * Only takes effect if Schema validation is enabled. * If turned off, partial constraint checking is done. * * Full schema constraint checking includes those checking that may * be time-consuming or memory intensive. Currently, particle unique * attribution constraint checking and particle derivation resriction checking * are controlled by this option. * * The parser's default state is: false. * * @param schemaFullChecking True to turn on full schema constraint checking. * * @see #getValidationSchemaFullChecking */ void setValidationSchemaFullChecking(const bool schemaFullChecking); /** * This method allows the user to specify a list of schemas to use. * If the targetNamespace of a schema specified using this method matches * the targetNamespace of a schema occuring in the instance document in * the schemaLocation attribute, or if the targetNamespace matches the * namespace attribute of the "import" element, the schema specified by the * user using this method will be used (i.e., the schemaLocation attribute * in the instance document or on the "import" element will be effectively ignored). * * If this method is called more than once, only the last one takes effect. * * The syntax is the same as for schemaLocation attributes in instance * documents: e.g, "http://www.example.com file_name.xsd". The user can * specify more than one XML Schema in the list. * * @param schemaLocation the list of schemas to use * * @see #getExternalSchemaLocation */ void setExternalSchemaLocation(const XMLCh* const schemaLocation); /** * This method is same as setExternalSchemaLocation(const XMLCh* const). * It takes native char string as parameter * * @param schemaLocation the list of schemas to use * * @see #setExternalSchemaLocation(const XMLCh* const) */ void setExternalSchemaLocation(const char* const schemaLocation); /** * This method allows the user to specify the no target namespace XML * Schema Location externally. If specified, the instance document's * noNamespaceSchemaLocation attribute will be effectively ignored. * * If this method is called more than once, only the last one takes effect. * * The syntax is the same as for the noNamespaceSchemaLocation attribute * that may occur in an instance document: e.g."file_name.xsd". * * @param noNamespaceSchemaLocation the XML Schema Location with no target namespace * * @see #getExternalNoNamespaceSchemaLocation */ void setExternalNoNamespaceSchemaLocation(const XMLCh* const noNamespaceSchemaLocation); /** * This method is same as setExternalNoNamespaceSchemaLocation(const XMLCh* const). * It takes native char string as parameter * * @param noNamespaceSchemaLocation the XML Schema Location with no target namespace * * @see #setExternalNoNamespaceSchemaLocation(const XMLCh* const) */ void setExternalNoNamespaceSchemaLocation(const char* const noNamespaceSchemaLocation); /** * This allows an application to set a SecurityManager on * the parser; this object stores information that various * components use to limit their consumption of system * resources while processing documents. * * If this method is called more than once, only the last one takes effect. * It may not be reset during a parse. * * * @param securityManager the SecurityManager instance to * be used by this parser * * @see #getSecurityManager */ void setSecurityManager(SecurityManager* const securityManager); /** Set the 'Loading External DTD' flag * * This method allows users to enable or disable the loading of external DTD. * When set to false, the parser will ignore any external DTD completely * if the validationScheme is set to Val_Never. * * The parser's default state is: true. * * This flag is ignored if the validationScheme is set to Val_Always or Val_Auto. * * @param newState The value specifying whether external DTD should * be loaded or not. * * @see #getLoadExternalDTD * @see #setValidationScheme */ void setLoadExternalDTD(const bool newState); /** Set the 'create comment nodes' flag * * This method allows the user to specify whether the parser should * create comment nodes in the DOM tree being produced. *The default value is 'true'.
*
* @param create The new state of the create comment nodes
* flag.
* @see #getCreateCommentNodes
*/
void setCreateCommentNodes(const bool create);
/** Enable/disable src offset calculation
*
* This method allows users to enable/disable src offset calculation.
* Disabling the calculation will improve performance.
*
* The parser's default state is: false.
*
* @param newState The value specifying whether we should enable or
* disable src offset calculation
*
* @see #getCalculateSrcOfs
*/
void setCalculateSrcOfs(const bool newState);
/** Force standard uri
*
* This method allows users to tell the parser to force standard uri conformance.
*
* The parser's default state is: false.
*
* @param newState The value specifying whether the parser should reject malformed URI.
*
* @see #getStandardUriConformant
*/
void setStandardUriConformant(const bool newState);
/** Set the scanner to use when scanning the XML document
*
* This method allows users to set the scanner to use
* when scanning a given XML document.
*
* @param scannerName The name of the desired scanner
*/
void useScanner(const XMLCh* const scannerName);
/** Set the implementation to use when creating the document
*
* This method allows users to set the implementation to use
* to create the document when parseing.
*
* @param implementationFeatures The names of the desired features the implementation should have.
*/
void useImplementation(const XMLCh* const implementationFeatures);
/**
* This method installs the user specified PSVI handler on
* the parser.
*
* @param handler A pointer to the PSVI handler to be called
* when the parser comes across 'PSVI' events
* as per the schema specification.
*/
virtual void setPSVIHandler(PSVIHandler* const handler);
//@}
// -----------------------------------------------------------------------
// Parsing methods
// -----------------------------------------------------------------------
/** @name Parsing methods */
//@{
/** Parse via an input source object
*
* This method invokes the parsing process on the XML file specified
* by the InputSource parameter. This API is borrowed from the
* SAX Parser interface.
*
* @param source A const reference to the InputSource object which
* points to the XML file to be parsed.
* @exception SAXException Any SAX exception, possibly
* wrapping another exception.
* @exception XMLException An exception from the parser or client
* handler code.
* @exception DOMException A DOM exception as per DOM spec.
* @see InputSource#InputSource
*/
void parse(const InputSource& source);
/** Parse via a file path or URL
*
* This method invokes the parsing process on the XML file specified by
* the Unicode string parameter 'systemId'. This method is borrowed
* from the SAX Parser interface.
*
* @param systemId A const XMLCh pointer to the Unicode string which
* contains the path to the XML file to be parsed.
*
* @exception SAXException Any SAX exception, possibly
* wrapping another exception.
* @exception XMLException An exception from the parser or client
* handler code.
* @exception DOMException A DOM exception as per DOM spec.
* @see #parse(InputSource,...)
*/
void parse(const XMLCh* const systemId);
/** Parse via a file path or URL (in the local code page)
*
* This method invokes the parsing process on the XML file specified by
* the native char* string parameter 'systemId'.
*
* @param systemId A const char pointer to a native string which
* contains the path to the XML file to be parsed.
*
* @exception SAXException Any SAX exception, possibly
* wrapping another exception.
* @exception XMLException An exception from the parser or client
* handler code.
* @exception DOMException A DOM exception as per DOM spec.
* @see #parse(InputSource,...)
*/
void parse(const char* const systemId);
/** Begin a progressive parse operation
*
* This method is used to start a progressive parse on a XML file.
* To continue parsing, subsequent calls must be to the parseNext
* method.
*
* It scans through the prolog and returns a token to be used on
* subsequent scanNext() calls. If the return value is true, then the
* token is legal and ready for further use. If it returns false, then
* the scan of the prolog failed and the token is not going to work on
* subsequent scanNext() calls.
*
* @param systemId A pointer to a Unicode string represting the path
* to the XML file to be parsed.
* @param toFill A token maintaing state information to maintain
* internal consistency between invocation of 'parseNext'
* calls.
* @return 'true', if successful in parsing the prolog. It indicates the
* user can go ahead with parsing the rest of the file. It
* returns 'false' to indicate that the parser could not parse
* the prolog.
*
* @see #parseNext
* @see #parseFirst(char*,...)
* @see #parseFirst(InputSource&,...)
*/
bool parseFirst
(
const XMLCh* const systemId
, XMLPScanToken& toFill
);
/** Begin a progressive parse operation
*
* This method is used to start a progressive parse on a XML file.
* To continue parsing, subsequent calls must be to the parseNext
* method.
*
* It scans through the prolog and returns a token to be used on
* subsequent scanNext() calls. If the return value is true, then the
* token is legal and ready for further use. If it returns false, then
* the scan of the prolog failed and the token is not going to work on
* subsequent scanNext() calls.
*
* @param systemId A pointer to a regular native string represting
* the path to the XML file to be parsed.
* @param toFill A token maintaing state information to maintain
* internal consistency between invocation of 'parseNext'
* calls.
*
* @return 'true', if successful in parsing the prolog. It indicates the
* user can go ahead with parsing the rest of the file. It
* returns 'false' to indicate that the parser could not parse
* the prolog.
*
* @see #parseNext
* @see #parseFirst(XMLCh*,...)
* @see #parseFirst(InputSource&,...)
*/
bool parseFirst
(
const char* const systemId
, XMLPScanToken& toFill
);
/** Begin a progressive parse operation
*
* This method is used to start a progressive parse on a XML file.
* To continue parsing, subsequent calls must be to the parseNext
* method.
*
* It scans through the prolog and returns a token to be used on
* subsequent scanNext() calls. If the return value is true, then the
* token is legal and ready for further use. If it returns false, then
* the scan of the prolog failed and the token is not going to work on
* subsequent scanNext() calls.
*
* @param source A const reference to the InputSource object which
* points to the XML file to be parsed.
* @param toFill A token maintaing state information to maintain
* internal consistency between invocation of 'parseNext'
* calls.
*
* @return 'true', if successful in parsing the prolog. It indicates the
* user can go ahead with parsing the rest of the file. It
* returns 'false' to indicate that the parser could not parse
* the prolog.
*
* @see #parseNext
* @see #parseFirst(XMLCh*,...)
* @see #parseFirst(char*,...)
*/
bool parseFirst
(
const InputSource& source
, XMLPScanToken& toFill
);
/** Continue a progressive parse operation
*
* This method is used to continue with progressive parsing of
* XML files started by a call to 'parseFirst' method.
*
* It parses the XML file and stops as soon as it comes across
* a XML token (as defined in the XML specification).
*
* @param token A token maintaing state information to maintain
* internal consistency between invocation of 'parseNext'
* calls.
*
* @return 'true', if successful in parsing the next XML token.
* It indicates the user can go ahead with parsing the rest
* of the file. It returns 'false' to indicate that the parser
* could not find next token as per the XML specification
* production rule.
*
* @see #parseFirst(XMLCh*,...)
* @see #parseFirst(char*,...)
* @see #parseFirst(InputSource&,...)
*/
bool parseNext(XMLPScanToken& token);
/** Reset the parser after a progressive parse
*
* If a progressive parse loop exits before the end of the document
* is reached, the parser has no way of knowing this. So it will leave
* open any files or sockets or memory buffers that were in use at
* the time that the parse loop exited.
*
* The next parse operation will cause these open files and such to
* be closed, but the next parse operation might occur at some unknown
* future point. To avoid this problem, you should reset the parser if
* you exit the loop early.
*
* If you exited because of an error, then this cleanup will be done
* for you. Its only when you exit the file prematurely of your own
* accord, because you've found what you wanted in the file most
* likely.
*
* @param token A token maintaing state information to maintain
* internal consistency between invocation of 'parseNext'
* calls.
*
* @see #parseFirst(XMLCh*,...)
* @see #parseFirst(char*,...)
* @see #parseFirst(InputSource&,...)
*/
void parseReset(XMLPScanToken& token);
//@}
// -----------------------------------------------------------------------
// Implementation of the XMLDocumentHandler interface.
// -----------------------------------------------------------------------
/** @name Implementation of the XMLDocumentHandler interface. */
//@{
/** Handle document character events
*
* This method is used to report all the characters scanned by the
* parser. This DOM implementation stores this data in the appropriate
* DOM node, creating one if necessary.
*
* @param chars A const pointer to a Unicode string representing the
* character data.
* @param length The length of the Unicode string returned in 'chars'.
* @param cdataSection A flag indicating if the characters represent
* content from the CDATA section.
*/
virtual void docCharacters
(
const XMLCh* const chars
, const unsigned int length
, const bool cdataSection
);
/** Handle a document comment event
*
* This method is used to report any comments scanned by the parser.
* A new comment node is created which stores this data.
*
* @param comment A const pointer to a null terminated Unicode
* string representing the comment text.
*/
virtual void docComment
(
const XMLCh* const comment
);
/** Handle a document PI event
*
* This method is used to report any PI scanned by the parser. A new
* PI node is created and appended as a child of the current node in
* the tree.
*
* @param target A const pointer to a Unicode string representing the
* target of the PI declaration.
* @param data A const pointer to a Unicode string representing the
* data of the PI declaration. See the PI production rule
* in the XML specification for details.
*/
virtual void docPI
(
const XMLCh* const target
, const XMLCh* const data
);
/** Handle the end of document event
*
* This method is used to indicate the end of the current document.
*/
virtual void endDocument();
/** Handle and end of element event
*
* This method is used to indicate the end tag of an element. The
* DOM parser pops the current element off the top of the element
* stack, and make it the new current element.
*
* @param elemDecl A const reference to the object containing element
* declaration information.
* @param urlId An id referring to the namespace prefix, if
* namespaces setting is switched on.
* @param isRoot A flag indicating whether this element was the
* root element.
* @param elemPrefix A const pointer to a Unicode string containing
* the namespace prefix for this element. Applicable
* only when namespace processing is enabled.
*/
virtual void endElement
(
const XMLElementDecl& elemDecl
, const unsigned int urlId
, const bool isRoot
, const XMLCh* const elemPrefix
);
/** Handle and end of entity reference event
*
* This method is used to indicate that an end of an entity reference
* was just scanned.
*
* @param entDecl A const reference to the object containing the
* entity declaration information.
*/
virtual void endEntityReference
(
const XMLEntityDecl& entDecl
);
/** Handle an ignorable whitespace vent
*
* This method is used to report all the whitespace characters, which
* are determined to be 'ignorable'. This distinction between characters
* is only made, if validation is enabled.
*
* Any whitespace before content is ignored. If the current node is
* already of type DOMNode::TEXT_NODE, then these whitespaces are
* appended, otherwise a new Text node is created which stores this
* data. Essentially all contiguous ignorable characters are collected
* in one node.
*
* @param chars A const pointer to a Unicode string representing the
* ignorable whitespace character data.
* @param length The length of the Unicode string 'chars'.
* @param cdataSection A flag indicating if the characters represent
* content from the CDATA section.
*/
virtual void ignorableWhitespace
(
const XMLCh* const chars
, const unsigned int length
, const bool cdataSection
);
/** Handle a document reset event
*
* This method allows the user installed Document Handler to 'reset'
* itself, freeing all the memory resources. The scanner calls this
* method before starting a new parse event.
*/
virtual void resetDocument();
/** Handle a start document event
*
* This method is used to report the start of the parsing process.
*/
virtual void startDocument();
/** Handle a start element event
*
* This method is used to report the start of an element. It is
* called at the end of the element, by which time all attributes
* specified are also parsed. A new DOM Element node is created
* along with as many attribute nodes as required. This new element
* is added appended as a child of the current node in the tree, and
* then replaces it as the current node (if the isEmpty flag is false.)
*
* @param elemDecl A const reference to the object containing element
* declaration information.
* @param urlId An id referring to the namespace prefix, if
* namespaces setting is switched on.
* @param elemPrefix A const pointer to a Unicode string containing
* the namespace prefix for this element. Applicable
* only when namespace processing is enabled.
* @param attrList A const reference to the object containing the
* list of attributes just scanned for this element.
* @param attrCount A count of number of attributes in the list
* specified by the parameter 'attrList'.
* @param isEmpty A flag indicating whether this is an empty element
* or not. If empty, then no endElement() call will
* be made.
* @param isRoot A flag indicating whether this element was the
* root element.
* @see DocumentHandler#startElement
*/
virtual void startElement
(
const XMLElementDecl& elemDecl
, const unsigned int urlId
, const XMLCh* const elemPrefix
, const RefVectorOf By default, the parser does not to any validation. The default
* value is false. The default value is 'false'.
*
* @param expand The new state of the expand entity reference
* flag.
* @see #setCreateEntityReferenceNodes
*/
void setExpandEntityReferences(const bool expand);
//@}
protected :
// -----------------------------------------------------------------------
// Protected Constructor Methods
// -----------------------------------------------------------------------
/** @name Constructors */
//@{
/** Construct a AbstractDOMParser, with an optional validator
*
* Constructor with an instance of validator class to use for
* validation. If you don't provide a validator, a default one will
* be created for you in the scanner.
*
* @param valToAdopt Pointer to the validator instance to use. The
* parser is responsible for freeing the memory.
*
* @param gramPool Pointer to the grammar pool instance from
* external application (through derivatives).
* The parser does NOT own it.
*
* @param manager Pointer to the memory manager to be used to
* allocate objects.
*/
AbstractDOMParser
(
XMLValidator* const valToAdopt = 0
, MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager
, XMLGrammarPool* const gramPool = 0
);
//@}
// -----------------------------------------------------------------------
// Protected getter methods
// -----------------------------------------------------------------------
/** @name Protected getter methods */
//@{
/** Get the current DOM node
*
* This provides derived classes with access to the current node, i.e.
* the node to which new nodes are being added.
*/
DOMNode* getCurrentNode();
/** Get the XML scanner
*
* This provides derived classes with access to the XML scanner.
*/
XMLScanner* getScanner() const;
/** Get the Grammar resolver
*
* This provides derived classes with access to the grammar resolver.
*/
GrammarResolver* getGrammarResolver() const;
/** Get the parse in progress flag
*
* This provides derived classes with access to the parse in progress
* flag.
*/
bool getParseInProgress() const;
MemoryManager* getMemoryManager() const;
//@}
// -----------------------------------------------------------------------
// Protected setter methods
// -----------------------------------------------------------------------
/** @name Protected setter methods */
//@{
/** Set the current DOM node
*
* This method sets the current node maintained inside the parser to
* the one specified.
*
* @param toSet The DOM node which will be the current node.
*/
void setCurrentNode(DOMNode* toSet);
/** Set the document node
*
* This method sets the DOM Document node to the one specified.
*
* @param toSet The new DOM Document node for this XML document.
*/
void setDocument(DOMDocument* toSet);
/** Set the parse in progress flag
*
* This method sets the parse in progress flag to true or false.
*
* @param toSet The value of the flag to be set.
*/
void setParseInProgress(const bool toSet);
//@}
// -----------------------------------------------------------------------
// Protected Helper methods
// -----------------------------------------------------------------------
/** @name Protected helper methods */
//@{
virtual DOMElement* createElementNSNode(const XMLCh *fNamespaceURI,
const XMLCh *qualifiedName);
void resetPool();
/**
* Returns true if the user has adopted the document
*/
bool isDocumentAdopted() const;
//@}
private :
// -----------------------------------------------------------------------
// Initialize/Cleanup methods
// -----------------------------------------------------------------------
void initialize();
void cleanUp();
// -----------------------------------------------------------------------
// Unimplemented constructors and operators
// -----------------------------------------------------------------------
AbstractDOMParser(const AbstractDOMParser&);
AbstractDOMParser& operator=(const AbstractDOMParser&);
protected:
// -----------------------------------------------------------------------
// Protected data members
//
// fCurrentNode
// fCurrentParent
// Used to track the current node during nested element events. Since
// the tree must be built from a set of disjoint callbacks, we need
// these to keep up with where we currently are.
//
// fCurrentEntity
// Used to track the current entity decl. If a text decl is seen later on,
// it is used to update the encoding and version information.
//
// fDocument
// The root document object, filled with the document contents.
//
// fCreateEntityReferenceNodes
// Indicates whether entity reference nodes should be created.
//
// fIncludeIgnorableWhitespace
// Indicates whether ignorable whiltespace should be added to
// the DOM tree for validating parsers.
//
// fScanner
// The scanner used for this parser. This is created during the
// constructor.
//
// fImplementationFeatures
// The implementation features that we use to get an implementation
// for use in creating the DOMDocument used during parse. If this is
// null then the default DOMImplementation is used
//
// fNodeStack
// Used to track previous parent nodes during nested element events.
//
// fParseInProgress
// Used to prevent multiple entrance to the parser while its doing
// a parse.
//
// fWithinElement
// A flag to indicate that the parser is within at least one level
// of element processing.
//
// fDocumentType
// Used to store and update the documentType variable information
// in fDocument
//
// fDocumentVector
// Store all the previous fDocument(s) (thus not the current fDocument)
// created in this parser. It is destroyed when the parser is destructed.
//
// fCreateCommentNodes
// Indicates whether comment nodes should be created.
//
// fDocumentAdoptedByUser
// The DOMDocument ownership has been transferred to application
// If set to true, the parser does not own the document anymore
// and thus will not release its memory.
//
// fInternalSubset
// Buffer for storing the internal subset information.
// Once complete (after DOCTYPE is finished scanning), send
// it to DocumentType Node
//
// fGrammarPool
// The grammar pool passed from external application (through derivatives).
// which could be 0, not owned.
//
// -----------------------------------------------------------------------
bool fCreateEntityReferenceNodes;
bool fIncludeIgnorableWhitespace;
bool fWithinElement;
bool fParseInProgress;
bool fCreateCommentNodes;
bool fDocumentAdoptedByUser;
XMLScanner* fScanner;
XMLCh* fImplementationFeatures;
DOMNode* fCurrentParent;
DOMNode* fCurrentNode;
DOMEntity* fCurrentEntity;
DOMDocumentImpl* fDocument;
ValueStackOf