source: NonGTP/Xerces/xercesc/internal/IGXMLScanner.hpp @ 188

Revision 188, 16.1 KB checked in by mattausch, 19 years ago (diff)

added xercesc to support

Line 
1/*
2 * The Apache Software License, Version 1.1
3 *
4 * Copyright (c) 2002 The Apache Software Foundation.  All rights
5 * reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 *
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in
16 *    the documentation and/or other materials provided with the
17 *    distribution.
18 *
19 * 3. The end-user documentation included with the redistribution,
20 *    if any, must include the following acknowledgment:
21 *       "This product includes software developed by the
22 *        Apache Software Foundation (http://www.apache.org/)."
23 *    Alternately, this acknowledgment may appear in the software itself,
24 *    if and wherever such third-party acknowledgments normally appear.
25 *
26 * 4. The names "Xerces" and "Apache Software Foundation" must
27 *    not be used to endorse or promote products derived from this
28 *    software without prior written permission. For written
29 *    permission, please contact apache\@apache.org.
30 *
31 * 5. Products derived from this software may not be called "Apache",
32 *    nor may "Apache" appear in their name, without prior written
33 *    permission of the Apache Software Foundation.
34 *
35 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
36 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
37 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
38 * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
39 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
40 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
41 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
42 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
43 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
44 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
45 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
46 * SUCH DAMAGE.
47 * ====================================================================
48 *
49 * This software consists of voluntary contributions made by many
50 * individuals on behalf of the Apache Software Foundation, and was
51 * originally based on software copyright (c) 1999, International
52 * Business Machines, Inc., http://www.ibm.com .  For more information
53 * on the Apache Software Foundation, please see
54 * <http://www.apache.org/>.
55 */
56
57/*
58 * $Log: IGXMLScanner.hpp,v $
59 * Revision 1.18  2004/01/29 11:46:30  cargilld
60 * Code cleanup changes to get rid of various compiler diagnostic messages.
61 *
62 * Revision 1.17  2003/11/28 19:54:31  knoaman
63 * PSVIElement update
64 *
65 * Revision 1.16  2003/11/28 05:13:29  neilg
66 * Fix state-ful duplicate attribute detection when the integrated
67 * scanner is in use and namespaces are off.  Also, implement
68 * change to PSVIHandler interface to remove prefix passing.
69 *
70 * Revision 1.15  2003/11/27 22:52:37  knoaman
71 * PSVIElement implementation
72 *
73 * Revision 1.14  2003/11/27 06:10:31  neilg
74 * PSVIAttribute implementation
75 *
76 * Revision 1.13  2003/11/26 16:20:00  knoaman
77 * Store XSModel.
78 *
79 * Revision 1.12  2003/11/24 05:09:38  neilg
80 * implement new, statless, method for detecting duplicate attributes
81 *
82 * Revision 1.11  2003/10/22 20:22:30  knoaman
83 * Prepare for annotation support.
84 *
85 * Revision 1.10  2003/09/22 19:51:41  neilg
86 * scanners should maintain their own pools of undeclared elements, rather than requiring grammars to do this.  This makes grammar objects stateless with regard to validation.
87 *
88 * Revision 1.9  2003/08/14 02:56:41  knoaman
89 * Code refactoring to improve performance of validation.
90 *
91 * Revision 1.8  2003/07/10 19:47:23  peiyongz
92 * Stateless Grammar: Initialize scanner with grammarResolver,
93 *                                creating grammar through grammarPool
94 *
95 * Revision 1.7  2003/05/22 02:10:51  knoaman
96 * Default the memory manager.
97 *
98 * Revision 1.6  2003/05/15 18:26:29  knoaman
99 * Partial implementation of the configurable memory manager.
100 *
101 * Revision 1.5  2003/03/07 18:08:58  tng
102 * Return a reference instead of void for operator=
103 *
104 * Revision 1.4  2003/01/29 19:59:35  gareth
105 * we now detect when elements and attributes are validated/ the result of the validation and store that information.
106 *
107 * Revision 1.3  2003/01/15 15:49:49  knoaman
108 * Change constant declaration name to match its value.
109 *
110 * Revision 1.2  2003/01/13 16:30:18  knoaman
111 * [Bug 14469] Validator doesn't enforce xsd:key.
112 *
113 * Revision 1.1  2002/12/04 02:05:25  knoaman
114 * Initial checkin.
115 *
116 */
117
118
119#if !defined(IGXMLSCANNER_HPP)
120#define IGXMLSCANNER_HPP
121
122#include <xercesc/internal/XMLScanner.hpp>
123#include <xercesc/util/KVStringPair.hpp>
124#include <xercesc/util/NameIdPool.hpp>
125#include <xercesc/util/RefHash3KeysIdPool.hpp>
126#include <xercesc/validators/common/Grammar.hpp>
127#include <xercesc/validators/schema/SchemaElementDecl.hpp>
128
129XERCES_CPP_NAMESPACE_BEGIN
130
131class DTDElementDecl;
132class DTDGrammar;
133class DTDValidator;
134class SchemaValidator;
135class ValueStoreCache;
136class XPathMatcherStack;
137class FieldActivator;
138class IdentityConstraint;
139class ContentLeafNameTypeVector;
140class SchemaAttDef;
141class XMLContentModel;
142class XSModel;
143class PSVIAttributeList;
144class PSVIElement;
145
146//  This is an integrated scanner class, which does DTD/XML Schema grammar
147//  processing.
148class XMLPARSER_EXPORT IGXMLScanner : public XMLScanner
149{
150public :
151    // -----------------------------------------------------------------------
152    //  Constructors and Destructor
153    // -----------------------------------------------------------------------
154    IGXMLScanner
155    (
156        XMLValidator* const valToAdopt
157        , GrammarResolver* const grammarResolver
158        , MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager
159    );
160    IGXMLScanner
161    (
162          XMLDocumentHandler* const docHandler
163        , DocTypeHandler* const     docTypeHandler
164        , XMLEntityHandler* const   entityHandler
165        , XMLErrorReporter* const   errReporter
166        , XMLValidator* const       valToAdopt
167        , GrammarResolver* const    grammarResolver
168        , MemoryManager* const      manager = XMLPlatformUtils::fgMemoryManager
169    );
170    virtual ~IGXMLScanner();
171
172    // -----------------------------------------------------------------------
173    //  XMLScanner public virtual methods
174    // -----------------------------------------------------------------------
175    virtual const XMLCh* getName() const;
176    virtual NameIdPool<DTDEntityDecl>* getEntityDeclPool();
177    virtual const NameIdPool<DTDEntityDecl>* getEntityDeclPool() const;
178    virtual unsigned int resolveQName
179    (
180        const   XMLCh* const        qName
181        ,       XMLBuffer&          prefixBufToFill
182        , const short               mode
183        ,       int&                prefixColonPos
184    );
185    virtual void scanDocument
186    (
187        const   InputSource&    src
188    );
189    virtual bool scanNext(XMLPScanToken& toFill);
190    virtual Grammar* loadGrammar
191    (
192        const   InputSource&    src
193        , const short           grammarType
194        , const bool            toCache = false
195    );
196
197private :
198    // -----------------------------------------------------------------------
199    //  Unimplemented constructors and operators
200    // -----------------------------------------------------------------------
201    IGXMLScanner();
202    IGXMLScanner(const IGXMLScanner&);
203    IGXMLScanner& operator=(const IGXMLScanner&);
204
205    // -----------------------------------------------------------------------
206    //  XMLScanner virtual methods
207    // -----------------------------------------------------------------------
208    virtual void scanCDSection();
209    virtual void scanCharData(XMLBuffer& toToUse);
210    virtual EntityExpRes scanEntityRef
211    (
212        const   bool    inAttVal
213        ,       XMLCh&  firstCh
214        ,       XMLCh&  secondCh
215        ,       bool&   escaped
216    );
217    virtual void scanDocTypeDecl();
218    virtual void scanReset(const InputSource& src);
219    virtual void sendCharData(XMLBuffer& toSend);
220
221    // -----------------------------------------------------------------------
222    //  Private helper methods
223    // -----------------------------------------------------------------------
224    void commonInit();
225    void cleanUp();
226    InputSource* resolveSystemId(const XMLCh* const sysId); // return owned by caller
227
228    // Spaces are not allowed in URI, so %20 is used instead.
229    // Convert %20 to spaces before resolving the URI
230    void normalizeURI(const XMLCh* const systemURI, XMLBuffer& normalizedURI);
231
232    unsigned int buildAttList
233    (
234        const   RefVectorOf<KVStringPair>&  providedAttrs
235        , const unsigned int                attCount
236        ,       XMLElementDecl*             elemDecl
237        ,       RefVectorOf<XMLAttr>&       toFill
238    );
239    bool normalizeAttValue
240    (
241        const   XMLAttDef* const    attDef
242        , const XMLCh* const       name
243        , const XMLCh* const        value
244        ,       XMLBuffer&          toFill
245    );
246    bool normalizeAttRawValue
247    (
248        const   XMLCh* const        attrName
249        , const XMLCh* const        value
250        ,       XMLBuffer&          toFill
251    );
252    unsigned int resolvePrefix
253    (
254        const   XMLCh* const        prefix
255        , const ElemStack::MapModes mode
256    );
257    unsigned int resolvePrefix
258    (
259        const   XMLCh* const        prefix
260        ,       XMLBuffer&          uriBufToFill
261        , const ElemStack::MapModes mode
262    );
263    void updateNSMap
264    (
265        const   XMLCh* const    attrName
266        , const XMLCh* const    attrValue
267    );
268    void scanRawAttrListforNameSpaces(int attCount);
269    void parseSchemaLocation(const XMLCh* const schemaLocationStr);
270    void resolveSchemaGrammar(const XMLCh* const loc, const XMLCh* const uri);
271    bool switchGrammar(const XMLCh* const newGrammarNameSpace);
272    bool laxElementValidation(QName* element, ContentLeafNameTypeVector* cv,
273                              const XMLContentModel* const cm,
274                              const unsigned int parentElemDepth);
275    bool anyAttributeValidation(SchemaAttDef* attWildCard,
276                                unsigned int uriId,
277                                bool& skipThisOne,
278                                bool& laxThisOne);
279    void resizeElemState();
280    void processSchemaLocation(XMLCh* const schemaLoc);
281
282    // -----------------------------------------------------------------------
283    //  Private scanning methods
284    // -----------------------------------------------------------------------
285    bool basicAttrValueScan
286    (
287        const   XMLCh* const    attrName
288        ,       XMLBuffer&      toFill
289    );
290    unsigned int rawAttrScan
291    (
292        const   XMLCh* const                elemName
293        ,       RefVectorOf<KVStringPair>&  toFill
294        ,       bool&                       isEmpty
295    );
296    bool scanAttValue
297    (
298        const   XMLAttDef* const    attDef
299        , const   XMLCh* const      attrName
300        ,       XMLBuffer&          toFill
301    );
302    bool scanContent();
303    void scanEndTag(bool& gotData);
304    bool scanStartTag(bool& gotData);
305    bool scanStartTagNS(bool& gotData);
306
307    // -----------------------------------------------------------------------
308    //  IdentityConstraints Activation methods
309    // -----------------------------------------------------------------------
310    void activateSelectorFor(IdentityConstraint* const ic, const int initialDepth);
311
312    // -----------------------------------------------------------------------
313    //  Grammar preparsing methods
314    // -----------------------------------------------------------------------
315    Grammar* loadXMLSchemaGrammar(const InputSource& src, const bool toCache = false);
316    Grammar* loadDTDGrammar(const InputSource& src, const bool toCache = false);
317
318    // -----------------------------------------------------------------------
319    //  PSVI handling methods
320    // -----------------------------------------------------------------------
321    void endElementPSVI(SchemaElementDecl* const elemDecl,
322                        DatatypeValidator* const memberDV);
323    void resetPSVIElemContext();
324
325    // -----------------------------------------------------------------------
326    //  Data members
327    //
328    //  fRawAttrList
329    //      During the initial scan of the attributes we can only do a raw
330    //      scan for key/value pairs. So this vector is used to store them
331    //      until they can be processed (and put into fAttrList.)
332    //
333    //  fDTDValidator
334    //      The DTD validator instance.
335    //
336    //  fSchemaValidator
337    //      The Schema validator instance.
338    //
339    //  fSeeXsi
340    //      This flag indicates a schema has been seen.
341    //
342    //  fElemState
343    //  fElemStateSize
344    //      Stores an element next state from DFA content model - used for
345    //      wildcard validation
346    //
347    //  fMatcherStack
348    //      Stack of active XPath matchers for identity constraints. All
349    //      active XPath matchers are notified of startElement, characters
350    //      and endElement callbacks in order to perform their matches.
351    //
352    //  fValueStoreCache
353    //      Cache of value stores for identity constraint fields.
354    //
355    //  fFieldActivator
356    //      Activates fields within a certain scope when a selector matches
357    //      its xpath.
358    // fDTDElemNonDeclPool
359    //      registry of "faulted-in" DTD element decls
360    // fSchemaElemNonDeclPool
361    //      registry for elements without decls in the grammar
362    // fElemCount
363    //      count of the number of start tags seen so far (starts at 1).
364    //      Used for duplicate attribute detection/processing of required/defaulted attributes
365    // fAttDefRegistry
366    //      mapping from XMLAttDef instances to the count of the last
367    //      start tag where they were utilized.
368    // fUndeclaredAttrRegistry
369    //      mapping of attr QNames to the count of the last start tag in which they occurred
370    // fUndeclaredAttrRegistryNS
371    //      mapping of namespaceId/localName pairs to the count of the last
372    //      start tag in which they occurred.
373    //  fPSVIAttrList
374    //      PSVI attribute list implementation that needs to be
375    //      filled when a PSVIHandler is registered
376    //
377    // -----------------------------------------------------------------------
378    bool                        fSeeXsi;
379    Grammar::GrammarType        fGrammarType;
380    unsigned int                fElemStateSize;
381    unsigned int*               fElemState;
382    XMLBuffer                   fContent;
383    RefVectorOf<KVStringPair>*  fRawAttrList;
384    DTDValidator*               fDTDValidator;
385    SchemaValidator*            fSchemaValidator;
386    DTDGrammar*                 fDTDGrammar;
387    XPathMatcherStack*          fMatcherStack;
388    ValueStoreCache*            fValueStoreCache;
389    FieldActivator*             fFieldActivator;
390    ValueVectorOf<XMLCh*>*      fLocationPairs;
391    NameIdPool<DTDElementDecl>* fDTDElemNonDeclPool;
392    RefHash3KeysIdPool<SchemaElementDecl>* fSchemaElemNonDeclPool;
393    unsigned int                            fElemCount;
394    RefHashTableOf<unsigned int>*           fAttDefRegistry;
395    RefHashTableOf<unsigned int>*           fUndeclaredAttrRegistry;
396    RefHash2KeysTableOf<unsigned int>*      fUndeclaredAttrRegistryNS;
397    PSVIAttributeList *                     fPSVIAttrList;
398    XSModel*                                fModel;
399    PSVIElement*                            fPSVIElement;
400    ValueStackOf<bool>*                     fErrorStack;
401    PSVIElemContext                         fPSVIElemContext;
402};
403
404inline const XMLCh* IGXMLScanner::getName() const
405{
406    return XMLUni::fgIGXMLScanner;
407}
408
409
410XERCES_CPP_NAMESPACE_END
411
412#endif
Note: See TracBrowser for help on using the repository browser.