source: NonGTP/Xerces/xerces-c_2_8_0/include/xercesc/validators/common/DFAContentModel.hpp @ 2674

Revision 2674, 9.3 KB checked in by mattausch, 16 years ago (diff)
Line 
1/*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements.  See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License.  You may obtain a copy of the License at
8 *
9 *      http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18/*
19 * $Id: DFAContentModel.hpp 568078 2007-08-21 11:43:25Z amassari $
20 */
21
22#if !defined(DFACONTENTMODEL_HPP)
23#define DFACONTENTMODEL_HPP
24
25#include <xercesc/util/XercesDefs.hpp>
26#include <xercesc/util/ArrayIndexOutOfBoundsException.hpp>
27#include <xercesc/framework/XMLContentModel.hpp>
28#include <xercesc/validators/common/ContentLeafNameTypeVector.hpp>
29
30XERCES_CPP_NAMESPACE_BEGIN
31
32class ContentSpecNode;
33class CMLeaf;
34class CMNode;
35class CMStateSet;
36
37//
38//  DFAContentModel is the heavy weight derivative of ContentModel that does
39//  all of the non-trivial element content validation. This guy does the full
40//  bore regular expression to DFA conversion to create a DFA that it then
41//  uses in its validation algorithm.
42//
43//  NOTE:   Upstream work insures that this guy will never see a content model
44//          with PCDATA in it. Any model with PCDATA is 'mixed' and is handled
45//          via the MixedContentModel class, since mixed models are very
46//          constrained in form and easily handled via a special case. This
47//          also makes our life much easier here.
48//
49class DFAContentModel : public XMLContentModel
50{
51public:
52    // -----------------------------------------------------------------------
53    //  Constructors and Destructor
54    // -----------------------------------------------------------------------
55    DFAContentModel
56    (
57          const bool             dtd
58        , ContentSpecNode* const elemContentSpec
59        , MemoryManager* const   manager = XMLPlatformUtils::fgMemoryManager
60    );
61    DFAContentModel
62    (
63          const bool             dtd
64        , ContentSpecNode* const elemContentSpec
65        , const bool             isMixed
66        , MemoryManager* const   manager
67    );
68
69    virtual ~DFAContentModel();
70
71
72    // -----------------------------------------------------------------------
73    //  Implementation of the virtual content model interface
74    // -----------------------------------------------------------------------
75    virtual int validateContent
76    (
77        QName** const         children
78      , const unsigned int    childCount
79      , const unsigned int    emptyNamespaceId
80    ) const;
81
82    virtual int validateContentSpecial
83    (
84        QName** const           children
85      , const unsigned int      childCount
86      , const unsigned int      emptyNamespaceId
87      , GrammarResolver*  const pGrammarResolver
88      , XMLStringPool*    const pStringPool
89    ) const;
90
91    virtual void checkUniqueParticleAttribution
92    (
93        SchemaGrammar*    const pGrammar
94      , GrammarResolver*  const pGrammarResolver
95      , XMLStringPool*    const pStringPool
96      , XMLValidator*     const pValidator
97      , unsigned int*     const pContentSpecOrgURI
98      , const XMLCh*            pComplexTypeName = 0
99    ) ;
100
101    virtual ContentLeafNameTypeVector* getContentLeafNameTypeVector() const ;
102
103    virtual unsigned int getNextState(const unsigned int currentState,
104                                      const unsigned int elementIndex) const;
105
106private :
107    // -----------------------------------------------------------------------
108    //  Unimplemented constructors and operators
109    // -----------------------------------------------------------------------
110    DFAContentModel();
111    DFAContentModel(const DFAContentModel&);
112    DFAContentModel& operator=(const DFAContentModel&);
113
114
115    // -----------------------------------------------------------------------
116    //  Private helper methods
117    // -----------------------------------------------------------------------
118    void buildDFA(ContentSpecNode* const curNode);
119    CMNode* buildSyntaxTree(ContentSpecNode* const curNode);
120    void calcFollowList(CMNode* const curNode);
121    unsigned int* makeDefStateList() const;
122    int postTreeBuildInit
123    (
124                CMNode* const   nodeCur
125        , const unsigned int    curIndex
126    );
127
128
129    // -----------------------------------------------------------------------
130    //  Private data members
131    //
132    //  fElemMap
133    //  fElemMapSize
134    //      This is the map of unique input symbol elements to indices into
135    //      each state's per-input symbol transition table entry. This is part
136    //      of the built DFA information that must be kept around to do the
137    //      actual validation.
138    //
139    //  fElemMapType
140    //      This is a map of whether the element map contains information
141    //      related to ANY models.
142    //
143    //  fEmptyOk
144    //      This is an optimization. While building the transition table we
145    //      can see whether this content model would approve of an empty
146    //      content (which could happen if everything was optional.) So we
147    //      set this flag and short circuit that check, which would otherwise
148    //      be ugly and time consuming if we tried to determine it at each
149    //      validation call.
150    //
151    //  fEOCPos
152    //      The NFA position of the special EOC (end of content) node. This
153    //      is saved away since its used during the DFA build.
154    //
155    //  fFinalStateFlags
156    //      This is an array of booleans, one per state (there are
157    //      fTransTableSize states in the DFA) that indicates whether that
158    //      state is a final state.
159    //
160    //  fFollowList
161    //      The list of follow positions for each NFA position (i.e. for each
162    //      non-epsilon leaf node.) This is only used during the building of
163    //      the DFA, and is let go afterwards.
164    //
165    //  fHeadNode
166    //      This is the head node of our intermediate representation. It is
167    //      only non-null during the building of the DFA (just so that it
168    //      does not have to be passed all around.) Once the DFA is built,
169    //      this is no longer required so its deleted.
170    //
171    //  fLeafCount
172    //      The count of leaf nodes. This is an important number that set some
173    //      limits on the sizes of data structures in the DFA process.
174    //
175    //  fLeafList
176    //      An array of non-epsilon leaf nodes, which is used during the DFA
177    //      build operation, then dropped. These are just references to nodes
178    //      pointed to by fHeadNode, so we don't have to clean them up, just
179    //      the actually leaf list array itself needs cleanup.
180    //
181    //  fLeafListType
182    //      Array mapping ANY types to the leaf list.
183    //
184    //  fTransTable
185    //  fTransTableSize
186    //      This is the transition table that is the main by product of all
187    //      of the effort here. It is an array of arrays of ints. The first
188    //      dimension is the number of states we end up with in the DFA. The
189    //      second dimensions is the number of unique elements in the content
190    //      model (fElemMapSize). Each entry in the second dimension indicates
191    //      the new state given that input for the first dimension's start
192    //      state.
193    //
194    //      The fElemMap array handles mapping from element indexes to
195    //      positions in the second dimension of the transition table.
196    //
197    //      fTransTableSize is the number of valid entries in the transition
198    //      table, and in the other related tables such as fFinalStateFlags.
199    //
200    //  fDTD
201    //      Boolean to allow DTDs to validate even with namespace support.
202    //
203    //  fIsMixed
204    //      DFA ContentModel with mixed PCDATA.
205    // -----------------------------------------------------------------------
206    QName**                 fElemMap;
207    ContentSpecNode::NodeTypes  *fElemMapType;
208    unsigned int            fElemMapSize;
209    bool                    fEmptyOk;
210    unsigned int            fEOCPos;
211    bool*                   fFinalStateFlags;
212    CMStateSet**            fFollowList;
213    CMNode*                 fHeadNode;
214    unsigned int            fLeafCount;
215    CMLeaf**                fLeafList;
216    ContentSpecNode::NodeTypes  *fLeafListType;
217    unsigned int**          fTransTable;
218    unsigned int            fTransTableSize;
219    bool                    fDTD;
220    bool                    fIsMixed;
221    ContentLeafNameTypeVector *fLeafNameTypeVector;
222    MemoryManager*             fMemoryManager;
223};
224
225
226inline unsigned int
227DFAContentModel::getNextState(const unsigned int currentState,
228                              const unsigned int elementIndex) const {
229
230    if (currentState == XMLContentModel::gInvalidTrans) {
231        return XMLContentModel::gInvalidTrans;
232    }
233
234    if (currentState >= fTransTableSize || elementIndex >= fElemMapSize) {
235        ThrowXMLwithMemMgr(ArrayIndexOutOfBoundsException, XMLExcepts::Array_BadIndex, fMemoryManager);
236    }
237
238    return fTransTable[currentState][elementIndex];
239}
240
241XERCES_CPP_NAMESPACE_END
242
243#endif
244
Note: See TracBrowser for help on using the repository browser.