source: NonGTP/Xerces/xercesc/validators/common/DFAContentModel.hpp @ 188

Revision 188, 14.0 KB checked in by mattausch, 20 years ago (diff)

added xercesc to support

Line 
1/*
2 * The Apache Software License, Version 1.1
3 *
4 * Copyright (c) 1999-2001 The Apache Software Foundation.  All rights
5 * reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 *
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in
16 *    the documentation and/or other materials provided with the
17 *    distribution.
18 *
19 * 3. The end-user documentation included with the redistribution,
20 *    if any, must include the following acknowledgment:
21 *       "This product includes software developed by the
22 *        Apache Software Foundation (http://www.apache.org/)."
23 *    Alternately, this acknowledgment may appear in the software itself,
24 *    if and wherever such third-party acknowledgments normally appear.
25 *
26 * 4. The names "Xerces" and "Apache Software Foundation" must
27 *    not be used to endorse or promote products derived from this
28 *    software without prior written permission. For written
29 *    permission, please contact apache\@apache.org.
30 *
31 * 5. Products derived from this software may not be called "Apache",
32 *    nor may "Apache" appear in their name, without prior written
33 *    permission of the Apache Software Foundation.
34 *
35 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
36 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
37 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
38 * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
39 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
40 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
41 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
42 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
43 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
44 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
45 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
46 * SUCH DAMAGE.
47 * ====================================================================
48 *
49 * This software consists of voluntary contributions made by many
50 * individuals on behalf of the Apache Software Foundation, and was
51 * originally based on software copyright (c) 1999, International
52 * Business Machines, Inc., http://www.ibm.com .  For more information
53 * on the Apache Software Foundation, please see
54 * <http://www.apache.org/>.
55 */
56
57/*
58 * $Log: DFAContentModel.hpp,v $
59 * Revision 1.6  2003/12/17 00:18:38  cargilld
60 * Update to memory management so that the static memory manager (one used to call Initialize) is only for static data.
61 *
62 * Revision 1.5  2003/05/16 21:43:20  knoaman
63 * Memory manager implementation: Modify constructors to pass in the memory manager.
64 *
65 * Revision 1.4  2003/05/15 18:48:27  knoaman
66 * Partial implementation of the configurable memory manager.
67 *
68 * Revision 1.3  2003/03/07 18:16:57  tng
69 * Return a reference instead of void for operator=
70 *
71 * Revision 1.2  2002/11/04 14:54:58  tng
72 * C++ Namespace Support.
73 *
74 * Revision 1.1.1.1  2002/02/01 22:22:38  peiyongz
75 * sane_include
76 *
77 * Revision 1.13  2001/11/21 14:30:13  knoaman
78 * Fix for UPA checking.
79 *
80 * Revision 1.12  2001/08/24 12:48:48  tng
81 * Schema: AllContentModel
82 *
83 * Revision 1.11  2001/08/21 16:06:11  tng
84 * Schema: Unique Particle Attribution Constraint Checking.
85 *
86 * Revision 1.10  2001/08/13 15:06:39  knoaman
87 * update <any> validation.
88 *
89 * Revision 1.9  2001/06/13 20:50:55  peiyongz
90 * fIsMixed: to handle mixed Content Model
91 *
92 * Revision 1.8  2001/05/11 13:27:18  tng
93 * Copyright update.
94 *
95 * Revision 1.7  2001/05/03 21:02:30  tng
96 * Schema: Add SubstitutionGroupComparator and update exception messages.  By Pei Yong Zhang.
97 *
98 * Revision 1.6  2001/04/19 18:17:30  tng
99 * Schema: SchemaValidator update, and use QName in Content Model
100 *
101 * Revision 1.5  2001/03/21 21:56:27  tng
102 * Schema: Add Schema Grammar, Schema Validator, and split the DTDValidator into DTDValidator, DTDScanner, and DTDGrammar.
103 *
104 * Revision 1.4  2001/03/21 19:29:55  tng
105 * Schema: Content Model Updates, by Pei Yong Zhang.
106 *
107 * Revision 1.3  2001/02/27 18:32:32  tng
108 * Schema: Use XMLElementDecl instead of DTDElementDecl in Content Model.
109 *
110 * Revision 1.2  2001/02/27 14:48:52  tng
111 * Schema: Add CMAny and ContentLeafNameTypeVector, by Pei Yong Zhang
112 *
113 * Revision 1.1  2001/02/16 14:17:29  tng
114 * Schema: Move the common Content Model files that are shared by DTD
115 * and schema from 'DTD' folder to 'common' folder.  By Pei Yong Zhang.
116 *
117 * Revision 1.4  2000/03/02 19:55:38  roddey
118 * This checkin includes many changes done while waiting for the
119 * 1.1.0 code to be finished. I can't list them all here, but a list is
120 * available elsewhere.
121 *
122 * Revision 1.3  2000/02/24 20:16:48  abagchi
123 * Swat for removing Log from API docs
124 *
125 * Revision 1.2  2000/02/09 21:42:37  abagchi
126 * Copyright swat
127 *
128 * Revision 1.1.1.1  1999/11/09 01:03:19  twl
129 * Initial checkin
130 *
131 * Revision 1.2  1999/11/08 20:45:38  rahul
132 * Swat for adding in Product name and CVS comment log variable.
133 *
134 */
135
136#if !defined(DFACONTENTMODEL_HPP)
137#define DFACONTENTMODEL_HPP
138
139#include <xercesc/util/XercesDefs.hpp>
140#include <xercesc/util/ArrayIndexOutOfBoundsException.hpp>
141#include <xercesc/framework/XMLContentModel.hpp>
142#include <xercesc/validators/common/ContentLeafNameTypeVector.hpp>
143
144XERCES_CPP_NAMESPACE_BEGIN
145
146class ContentSpecNode;
147class CMLeaf;
148class CMNode;
149class CMStateSet;
150
151//
152//  DFAContentModel is the heavy weight derivative of ContentModel that does
153//  all of the non-trivial element content validation. This guy does the full
154//  bore regular expression to DFA conversion to create a DFA that it then
155//  uses in its validation algorithm.
156//
157//  NOTE:   Upstream work insures that this guy will never see a content model
158//          with PCDATA in it. Any model with PCDATA is 'mixed' and is handled
159//          via the MixedContentModel class, since mixed models are very
160//          constrained in form and easily handled via a special case. This
161//          also makes our life much easier here.
162//
163class DFAContentModel : public XMLContentModel
164{
165public:
166    // -----------------------------------------------------------------------
167    //  Constructors and Destructor
168    // -----------------------------------------------------------------------
169    DFAContentModel
170    (
171          const bool             dtd
172        , ContentSpecNode* const elemContentSpec
173        , MemoryManager* const   manager = XMLPlatformUtils::fgMemoryManager
174    );
175    DFAContentModel
176    (
177          const bool             dtd
178        , ContentSpecNode* const elemContentSpec
179        , const bool             isMixed
180        , MemoryManager* const   manager
181    );
182
183    virtual ~DFAContentModel();
184
185
186    // -----------------------------------------------------------------------
187    //  Implementation of the virtual content model interface
188    // -----------------------------------------------------------------------
189    virtual int validateContent
190    (
191        QName** const         children
192      , const unsigned int    childCount
193      , const unsigned int    emptyNamespaceId
194    ) const;
195
196    virtual int validateContentSpecial
197    (
198        QName** const           children
199      , const unsigned int      childCount
200      , const unsigned int      emptyNamespaceId
201      , GrammarResolver*  const pGrammarResolver
202      , XMLStringPool*    const pStringPool
203    ) const;
204
205    virtual void checkUniqueParticleAttribution
206    (
207        SchemaGrammar*    const pGrammar
208      , GrammarResolver*  const pGrammarResolver
209      , XMLStringPool*    const pStringPool
210      , XMLValidator*     const pValidator
211      , unsigned int*     const pContentSpecOrgURI
212    ) ;
213
214    virtual ContentLeafNameTypeVector* getContentLeafNameTypeVector() const ;
215
216    virtual unsigned int getNextState(const unsigned int currentState,
217                                      const unsigned int elementIndex) const;
218
219private :
220    // -----------------------------------------------------------------------
221    //  Unimplemented constructors and operators
222    // -----------------------------------------------------------------------
223    DFAContentModel();
224    DFAContentModel(const DFAContentModel&);
225    DFAContentModel& operator=(const DFAContentModel&);
226
227
228    // -----------------------------------------------------------------------
229    //  Private helper methods
230    // -----------------------------------------------------------------------
231    void buildDFA(ContentSpecNode* const curNode);
232    CMNode* buildSyntaxTree(ContentSpecNode* const curNode);
233    void calcFollowList(CMNode* const curNode);
234    unsigned int* makeDefStateList() const;
235    int postTreeBuildInit
236    (
237                CMNode* const   nodeCur
238        , const unsigned int    curIndex
239    );
240
241
242    // -----------------------------------------------------------------------
243    //  Private data members
244    //
245    //  fElemMap
246    //  fElemMapSize
247    //      This is the map of unique input symbol elements to indices into
248    //      each state's per-input symbol transition table entry. This is part
249    //      of the built DFA information that must be kept around to do the
250    //      actual validation.
251    //
252    //  fElemMapType
253    //      This is a map of whether the element map contains information
254    //      related to ANY models.
255    //
256    //  fEmptyOk
257    //      This is an optimization. While building the transition table we
258    //      can see whether this content model would approve of an empty
259    //      content (which could happen if everything was optional.) So we
260    //      set this flag and short circuit that check, which would otherwise
261    //      be ugly and time consuming if we tried to determine it at each
262    //      validation call.
263    //
264    //  fEOCPos
265    //      The NFA position of the special EOC (end of content) node. This
266    //      is saved away since its used during the DFA build.
267    //
268    //  fFinalStateFlags
269    //      This is an array of booleans, one per state (there are
270    //      fTransTableSize states in the DFA) that indicates whether that
271    //      state is a final state.
272    //
273    //  fFollowList
274    //      The list of follow positions for each NFA position (i.e. for each
275    //      non-epsilon leaf node.) This is only used during the building of
276    //      the DFA, and is let go afterwards.
277    //
278    //  fHeadNode
279    //      This is the head node of our intermediate representation. It is
280    //      only non-null during the building of the DFA (just so that it
281    //      does not have to be passed all around.) Once the DFA is built,
282    //      this is no longer required so its deleted.
283    //
284    //  fLeafCount
285    //      The count of leaf nodes. This is an important number that set some
286    //      limits on the sizes of data structures in the DFA process.
287    //
288    //  fLeafList
289    //      An array of non-epsilon leaf nodes, which is used during the DFA
290    //      build operation, then dropped. These are just references to nodes
291    //      pointed to by fHeadNode, so we don't have to clean them up, just
292    //      the actually leaf list array itself needs cleanup.
293    //
294    //  fLeafListType
295    //      Array mapping ANY types to the leaf list.
296    //
297    //  fTransTable
298    //  fTransTableSize
299    //      This is the transition table that is the main by product of all
300    //      of the effort here. It is an array of arrays of ints. The first
301    //      dimension is the number of states we end up with in the DFA. The
302    //      second dimensions is the number of unique elements in the content
303    //      model (fElemMapSize). Each entry in the second dimension indicates
304    //      the new state given that input for the first dimension's start
305    //      state.
306    //
307    //      The fElemMap array handles mapping from element indexes to
308    //      positions in the second dimension of the transition table.
309    //
310    //      fTransTableSize is the number of valid entries in the transition
311    //      table, and in the other related tables such as fFinalStateFlags.
312    //
313    //  fDTD
314    //      Boolean to allow DTDs to validate even with namespace support.
315    //
316    //  fIsMixed
317    //      DFA ContentModel with mixed PCDATA.
318    // -----------------------------------------------------------------------
319    QName**                 fElemMap;
320    ContentSpecNode::NodeTypes  *fElemMapType;
321    unsigned int            fElemMapSize;
322    bool                    fEmptyOk;
323    unsigned int            fEOCPos;
324    bool*                   fFinalStateFlags;
325    CMStateSet**            fFollowList;
326    CMNode*                 fHeadNode;
327    unsigned int            fLeafCount;
328    CMLeaf**                fLeafList;
329    ContentSpecNode::NodeTypes  *fLeafListType;
330    unsigned int**          fTransTable;
331    unsigned int            fTransTableSize;
332    bool                    fDTD;
333    bool                    fIsMixed;
334    ContentLeafNameTypeVector *fLeafNameTypeVector;
335    MemoryManager*             fMemoryManager;
336};
337
338
339inline unsigned int
340DFAContentModel::getNextState(const unsigned int currentState,
341                              const unsigned int elementIndex) const {
342
343    if (currentState == XMLContentModel::gInvalidTrans) {
344        return XMLContentModel::gInvalidTrans;
345    }
346
347    if (currentState >= fTransTableSize || elementIndex >= fElemMapSize) {
348        ThrowXMLwithMemMgr(ArrayIndexOutOfBoundsException, XMLExcepts::Array_BadIndex, fMemoryManager);
349    }
350
351    return fTransTable[currentState][elementIndex];
352}
353
354XERCES_CPP_NAMESPACE_END
355
356#endif
357
Note: See TracBrowser for help on using the repository browser.