source: NonGTP/Xerces/xerces/include/xercesc/validators/common/DFAContentModel.hpp @ 358

Revision 358, 12.2 KB checked in by bittner, 19 years ago (diff)

xerces added

Line 
1/*
2 * Copyright 1999-2001,2004 The Apache Software Foundation.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17/*
18 * $Log: DFAContentModel.hpp,v $
19 * Revision 1.8  2004/09/16 13:32:03  amassari
20 * Updated error message for UPA to also state the complex type that is failing the test
21 *
22 * Revision 1.7  2004/09/08 13:56:51  peiyongz
23 * Apache License Version 2.0
24 *
25 * Revision 1.6  2003/12/17 00:18:38  cargilld
26 * Update to memory management so that the static memory manager (one used to call Initialize) is only for static data.
27 *
28 * Revision 1.5  2003/05/16 21:43:20  knoaman
29 * Memory manager implementation: Modify constructors to pass in the memory manager.
30 *
31 * Revision 1.4  2003/05/15 18:48:27  knoaman
32 * Partial implementation of the configurable memory manager.
33 *
34 * Revision 1.3  2003/03/07 18:16:57  tng
35 * Return a reference instead of void for operator=
36 *
37 * Revision 1.2  2002/11/04 14:54:58  tng
38 * C++ Namespace Support.
39 *
40 * Revision 1.1.1.1  2002/02/01 22:22:38  peiyongz
41 * sane_include
42 *
43 * Revision 1.13  2001/11/21 14:30:13  knoaman
44 * Fix for UPA checking.
45 *
46 * Revision 1.12  2001/08/24 12:48:48  tng
47 * Schema: AllContentModel
48 *
49 * Revision 1.11  2001/08/21 16:06:11  tng
50 * Schema: Unique Particle Attribution Constraint Checking.
51 *
52 * Revision 1.10  2001/08/13 15:06:39  knoaman
53 * update <any> validation.
54 *
55 * Revision 1.9  2001/06/13 20:50:55  peiyongz
56 * fIsMixed: to handle mixed Content Model
57 *
58 * Revision 1.8  2001/05/11 13:27:18  tng
59 * Copyright update.
60 *
61 * Revision 1.7  2001/05/03 21:02:30  tng
62 * Schema: Add SubstitutionGroupComparator and update exception messages.  By Pei Yong Zhang.
63 *
64 * Revision 1.6  2001/04/19 18:17:30  tng
65 * Schema: SchemaValidator update, and use QName in Content Model
66 *
67 * Revision 1.5  2001/03/21 21:56:27  tng
68 * Schema: Add Schema Grammar, Schema Validator, and split the DTDValidator into DTDValidator, DTDScanner, and DTDGrammar.
69 *
70 * Revision 1.4  2001/03/21 19:29:55  tng
71 * Schema: Content Model Updates, by Pei Yong Zhang.
72 *
73 * Revision 1.3  2001/02/27 18:32:32  tng
74 * Schema: Use XMLElementDecl instead of DTDElementDecl in Content Model.
75 *
76 * Revision 1.2  2001/02/27 14:48:52  tng
77 * Schema: Add CMAny and ContentLeafNameTypeVector, by Pei Yong Zhang
78 *
79 * Revision 1.1  2001/02/16 14:17:29  tng
80 * Schema: Move the common Content Model files that are shared by DTD
81 * and schema from 'DTD' folder to 'common' folder.  By Pei Yong Zhang.
82 *
83 * Revision 1.4  2000/03/02 19:55:38  roddey
84 * This checkin includes many changes done while waiting for the
85 * 1.1.0 code to be finished. I can't list them all here, but a list is
86 * available elsewhere.
87 *
88 * Revision 1.3  2000/02/24 20:16:48  abagchi
89 * Swat for removing Log from API docs
90 *
91 * Revision 1.2  2000/02/09 21:42:37  abagchi
92 * Copyright swat
93 *
94 * Revision 1.1.1.1  1999/11/09 01:03:19  twl
95 * Initial checkin
96 *
97 * Revision 1.2  1999/11/08 20:45:38  rahul
98 * Swat for adding in Product name and CVS comment log variable.
99 *
100 */
101
102#if !defined(DFACONTENTMODEL_HPP)
103#define DFACONTENTMODEL_HPP
104
105#include <xercesc/util/XercesDefs.hpp>
106#include <xercesc/util/ArrayIndexOutOfBoundsException.hpp>
107#include <xercesc/framework/XMLContentModel.hpp>
108#include <xercesc/validators/common/ContentLeafNameTypeVector.hpp>
109
110XERCES_CPP_NAMESPACE_BEGIN
111
112class ContentSpecNode;
113class CMLeaf;
114class CMNode;
115class CMStateSet;
116
117//
118//  DFAContentModel is the heavy weight derivative of ContentModel that does
119//  all of the non-trivial element content validation. This guy does the full
120//  bore regular expression to DFA conversion to create a DFA that it then
121//  uses in its validation algorithm.
122//
123//  NOTE:   Upstream work insures that this guy will never see a content model
124//          with PCDATA in it. Any model with PCDATA is 'mixed' and is handled
125//          via the MixedContentModel class, since mixed models are very
126//          constrained in form and easily handled via a special case. This
127//          also makes our life much easier here.
128//
129class DFAContentModel : public XMLContentModel
130{
131public:
132    // -----------------------------------------------------------------------
133    //  Constructors and Destructor
134    // -----------------------------------------------------------------------
135    DFAContentModel
136    (
137          const bool             dtd
138        , ContentSpecNode* const elemContentSpec
139        , MemoryManager* const   manager = XMLPlatformUtils::fgMemoryManager
140    );
141    DFAContentModel
142    (
143          const bool             dtd
144        , ContentSpecNode* const elemContentSpec
145        , const bool             isMixed
146        , MemoryManager* const   manager
147    );
148
149    virtual ~DFAContentModel();
150
151
152    // -----------------------------------------------------------------------
153    //  Implementation of the virtual content model interface
154    // -----------------------------------------------------------------------
155    virtual int validateContent
156    (
157        QName** const         children
158      , const unsigned int    childCount
159      , const unsigned int    emptyNamespaceId
160    ) const;
161
162    virtual int validateContentSpecial
163    (
164        QName** const           children
165      , const unsigned int      childCount
166      , const unsigned int      emptyNamespaceId
167      , GrammarResolver*  const pGrammarResolver
168      , XMLStringPool*    const pStringPool
169    ) const;
170
171    virtual void checkUniqueParticleAttribution
172    (
173        SchemaGrammar*    const pGrammar
174      , GrammarResolver*  const pGrammarResolver
175      , XMLStringPool*    const pStringPool
176      , XMLValidator*     const pValidator
177      , unsigned int*     const pContentSpecOrgURI
178      , const XMLCh*            pComplexTypeName = 0
179    ) ;
180
181    virtual ContentLeafNameTypeVector* getContentLeafNameTypeVector() const ;
182
183    virtual unsigned int getNextState(const unsigned int currentState,
184                                      const unsigned int elementIndex) const;
185
186private :
187    // -----------------------------------------------------------------------
188    //  Unimplemented constructors and operators
189    // -----------------------------------------------------------------------
190    DFAContentModel();
191    DFAContentModel(const DFAContentModel&);
192    DFAContentModel& operator=(const DFAContentModel&);
193
194
195    // -----------------------------------------------------------------------
196    //  Private helper methods
197    // -----------------------------------------------------------------------
198    void buildDFA(ContentSpecNode* const curNode);
199    CMNode* buildSyntaxTree(ContentSpecNode* const curNode);
200    void calcFollowList(CMNode* const curNode);
201    unsigned int* makeDefStateList() const;
202    int postTreeBuildInit
203    (
204                CMNode* const   nodeCur
205        , const unsigned int    curIndex
206    );
207
208
209    // -----------------------------------------------------------------------
210    //  Private data members
211    //
212    //  fElemMap
213    //  fElemMapSize
214    //      This is the map of unique input symbol elements to indices into
215    //      each state's per-input symbol transition table entry. This is part
216    //      of the built DFA information that must be kept around to do the
217    //      actual validation.
218    //
219    //  fElemMapType
220    //      This is a map of whether the element map contains information
221    //      related to ANY models.
222    //
223    //  fEmptyOk
224    //      This is an optimization. While building the transition table we
225    //      can see whether this content model would approve of an empty
226    //      content (which could happen if everything was optional.) So we
227    //      set this flag and short circuit that check, which would otherwise
228    //      be ugly and time consuming if we tried to determine it at each
229    //      validation call.
230    //
231    //  fEOCPos
232    //      The NFA position of the special EOC (end of content) node. This
233    //      is saved away since its used during the DFA build.
234    //
235    //  fFinalStateFlags
236    //      This is an array of booleans, one per state (there are
237    //      fTransTableSize states in the DFA) that indicates whether that
238    //      state is a final state.
239    //
240    //  fFollowList
241    //      The list of follow positions for each NFA position (i.e. for each
242    //      non-epsilon leaf node.) This is only used during the building of
243    //      the DFA, and is let go afterwards.
244    //
245    //  fHeadNode
246    //      This is the head node of our intermediate representation. It is
247    //      only non-null during the building of the DFA (just so that it
248    //      does not have to be passed all around.) Once the DFA is built,
249    //      this is no longer required so its deleted.
250    //
251    //  fLeafCount
252    //      The count of leaf nodes. This is an important number that set some
253    //      limits on the sizes of data structures in the DFA process.
254    //
255    //  fLeafList
256    //      An array of non-epsilon leaf nodes, which is used during the DFA
257    //      build operation, then dropped. These are just references to nodes
258    //      pointed to by fHeadNode, so we don't have to clean them up, just
259    //      the actually leaf list array itself needs cleanup.
260    //
261    //  fLeafListType
262    //      Array mapping ANY types to the leaf list.
263    //
264    //  fTransTable
265    //  fTransTableSize
266    //      This is the transition table that is the main by product of all
267    //      of the effort here. It is an array of arrays of ints. The first
268    //      dimension is the number of states we end up with in the DFA. The
269    //      second dimensions is the number of unique elements in the content
270    //      model (fElemMapSize). Each entry in the second dimension indicates
271    //      the new state given that input for the first dimension's start
272    //      state.
273    //
274    //      The fElemMap array handles mapping from element indexes to
275    //      positions in the second dimension of the transition table.
276    //
277    //      fTransTableSize is the number of valid entries in the transition
278    //      table, and in the other related tables such as fFinalStateFlags.
279    //
280    //  fDTD
281    //      Boolean to allow DTDs to validate even with namespace support.
282    //
283    //  fIsMixed
284    //      DFA ContentModel with mixed PCDATA.
285    // -----------------------------------------------------------------------
286    QName**                 fElemMap;
287    ContentSpecNode::NodeTypes  *fElemMapType;
288    unsigned int            fElemMapSize;
289    bool                    fEmptyOk;
290    unsigned int            fEOCPos;
291    bool*                   fFinalStateFlags;
292    CMStateSet**            fFollowList;
293    CMNode*                 fHeadNode;
294    unsigned int            fLeafCount;
295    CMLeaf**                fLeafList;
296    ContentSpecNode::NodeTypes  *fLeafListType;
297    unsigned int**          fTransTable;
298    unsigned int            fTransTableSize;
299    bool                    fDTD;
300    bool                    fIsMixed;
301    ContentLeafNameTypeVector *fLeafNameTypeVector;
302    MemoryManager*             fMemoryManager;
303};
304
305
306inline unsigned int
307DFAContentModel::getNextState(const unsigned int currentState,
308                              const unsigned int elementIndex) const {
309
310    if (currentState == XMLContentModel::gInvalidTrans) {
311        return XMLContentModel::gInvalidTrans;
312    }
313
314    if (currentState >= fTransTableSize || elementIndex >= fElemMapSize) {
315        ThrowXMLwithMemMgr(ArrayIndexOutOfBoundsException, XMLExcepts::Array_BadIndex, fMemoryManager);
316    }
317
318    return fTransTable[currentState][elementIndex];
319}
320
321XERCES_CPP_NAMESPACE_END
322
323#endif
324
Note: See TracBrowser for help on using the repository browser.