1 | /* |
---|
2 | * Licensed to the Apache Software Foundation (ASF) under one or more |
---|
3 | * contributor license agreements. See the NOTICE file distributed with |
---|
4 | * this work for additional information regarding copyright ownership. |
---|
5 | * The ASF licenses this file to You under the Apache License, Version 2.0 |
---|
6 | * (the "License"); you may not use this file except in compliance with |
---|
7 | * the License. You may obtain a copy of the License at |
---|
8 | * |
---|
9 | * http://www.apache.org/licenses/LICENSE-2.0 |
---|
10 | * |
---|
11 | * Unless required by applicable law or agreed to in writing, software |
---|
12 | * distributed under the License is distributed on an "AS IS" BASIS, |
---|
13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
---|
14 | * See the License for the specific language governing permissions and |
---|
15 | * limitations under the License. |
---|
16 | */ |
---|
17 | |
---|
18 | /* |
---|
19 | * $Id: DFAContentModel.hpp 568078 2007-08-21 11:43:25Z amassari $ |
---|
20 | */ |
---|
21 | |
---|
22 | #if !defined(DFACONTENTMODEL_HPP) |
---|
23 | #define DFACONTENTMODEL_HPP |
---|
24 | |
---|
25 | #include <xercesc/util/XercesDefs.hpp> |
---|
26 | #include <xercesc/util/ArrayIndexOutOfBoundsException.hpp> |
---|
27 | #include <xercesc/framework/XMLContentModel.hpp> |
---|
28 | #include <xercesc/validators/common/ContentLeafNameTypeVector.hpp> |
---|
29 | |
---|
30 | XERCES_CPP_NAMESPACE_BEGIN |
---|
31 | |
---|
32 | class ContentSpecNode; |
---|
33 | class CMLeaf; |
---|
34 | class CMNode; |
---|
35 | class CMStateSet; |
---|
36 | |
---|
37 | // |
---|
38 | // DFAContentModel is the heavy weight derivative of ContentModel that does |
---|
39 | // all of the non-trivial element content validation. This guy does the full |
---|
40 | // bore regular expression to DFA conversion to create a DFA that it then |
---|
41 | // uses in its validation algorithm. |
---|
42 | // |
---|
43 | // NOTE: Upstream work insures that this guy will never see a content model |
---|
44 | // with PCDATA in it. Any model with PCDATA is 'mixed' and is handled |
---|
45 | // via the MixedContentModel class, since mixed models are very |
---|
46 | // constrained in form and easily handled via a special case. This |
---|
47 | // also makes our life much easier here. |
---|
48 | // |
---|
49 | class DFAContentModel : public XMLContentModel |
---|
50 | { |
---|
51 | public: |
---|
52 | // ----------------------------------------------------------------------- |
---|
53 | // Constructors and Destructor |
---|
54 | // ----------------------------------------------------------------------- |
---|
55 | DFAContentModel |
---|
56 | ( |
---|
57 | const bool dtd |
---|
58 | , ContentSpecNode* const elemContentSpec |
---|
59 | , MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager |
---|
60 | ); |
---|
61 | DFAContentModel |
---|
62 | ( |
---|
63 | const bool dtd |
---|
64 | , ContentSpecNode* const elemContentSpec |
---|
65 | , const bool isMixed |
---|
66 | , MemoryManager* const manager |
---|
67 | ); |
---|
68 | |
---|
69 | virtual ~DFAContentModel(); |
---|
70 | |
---|
71 | |
---|
72 | // ----------------------------------------------------------------------- |
---|
73 | // Implementation of the virtual content model interface |
---|
74 | // ----------------------------------------------------------------------- |
---|
75 | virtual int validateContent |
---|
76 | ( |
---|
77 | QName** const children |
---|
78 | , const unsigned int childCount |
---|
79 | , const unsigned int emptyNamespaceId |
---|
80 | ) const; |
---|
81 | |
---|
82 | virtual int validateContentSpecial |
---|
83 | ( |
---|
84 | QName** const children |
---|
85 | , const unsigned int childCount |
---|
86 | , const unsigned int emptyNamespaceId |
---|
87 | , GrammarResolver* const pGrammarResolver |
---|
88 | , XMLStringPool* const pStringPool |
---|
89 | ) const; |
---|
90 | |
---|
91 | virtual void checkUniqueParticleAttribution |
---|
92 | ( |
---|
93 | SchemaGrammar* const pGrammar |
---|
94 | , GrammarResolver* const pGrammarResolver |
---|
95 | , XMLStringPool* const pStringPool |
---|
96 | , XMLValidator* const pValidator |
---|
97 | , unsigned int* const pContentSpecOrgURI |
---|
98 | , const XMLCh* pComplexTypeName = 0 |
---|
99 | ) ; |
---|
100 | |
---|
101 | virtual ContentLeafNameTypeVector* getContentLeafNameTypeVector() const ; |
---|
102 | |
---|
103 | virtual unsigned int getNextState(const unsigned int currentState, |
---|
104 | const unsigned int elementIndex) const; |
---|
105 | |
---|
106 | private : |
---|
107 | // ----------------------------------------------------------------------- |
---|
108 | // Unimplemented constructors and operators |
---|
109 | // ----------------------------------------------------------------------- |
---|
110 | DFAContentModel(); |
---|
111 | DFAContentModel(const DFAContentModel&); |
---|
112 | DFAContentModel& operator=(const DFAContentModel&); |
---|
113 | |
---|
114 | |
---|
115 | // ----------------------------------------------------------------------- |
---|
116 | // Private helper methods |
---|
117 | // ----------------------------------------------------------------------- |
---|
118 | void buildDFA(ContentSpecNode* const curNode); |
---|
119 | CMNode* buildSyntaxTree(ContentSpecNode* const curNode); |
---|
120 | void calcFollowList(CMNode* const curNode); |
---|
121 | unsigned int* makeDefStateList() const; |
---|
122 | int postTreeBuildInit |
---|
123 | ( |
---|
124 | CMNode* const nodeCur |
---|
125 | , const unsigned int curIndex |
---|
126 | ); |
---|
127 | |
---|
128 | |
---|
129 | // ----------------------------------------------------------------------- |
---|
130 | // Private data members |
---|
131 | // |
---|
132 | // fElemMap |
---|
133 | // fElemMapSize |
---|
134 | // This is the map of unique input symbol elements to indices into |
---|
135 | // each state's per-input symbol transition table entry. This is part |
---|
136 | // of the built DFA information that must be kept around to do the |
---|
137 | // actual validation. |
---|
138 | // |
---|
139 | // fElemMapType |
---|
140 | // This is a map of whether the element map contains information |
---|
141 | // related to ANY models. |
---|
142 | // |
---|
143 | // fEmptyOk |
---|
144 | // This is an optimization. While building the transition table we |
---|
145 | // can see whether this content model would approve of an empty |
---|
146 | // content (which could happen if everything was optional.) So we |
---|
147 | // set this flag and short circuit that check, which would otherwise |
---|
148 | // be ugly and time consuming if we tried to determine it at each |
---|
149 | // validation call. |
---|
150 | // |
---|
151 | // fEOCPos |
---|
152 | // The NFA position of the special EOC (end of content) node. This |
---|
153 | // is saved away since its used during the DFA build. |
---|
154 | // |
---|
155 | // fFinalStateFlags |
---|
156 | // This is an array of booleans, one per state (there are |
---|
157 | // fTransTableSize states in the DFA) that indicates whether that |
---|
158 | // state is a final state. |
---|
159 | // |
---|
160 | // fFollowList |
---|
161 | // The list of follow positions for each NFA position (i.e. for each |
---|
162 | // non-epsilon leaf node.) This is only used during the building of |
---|
163 | // the DFA, and is let go afterwards. |
---|
164 | // |
---|
165 | // fHeadNode |
---|
166 | // This is the head node of our intermediate representation. It is |
---|
167 | // only non-null during the building of the DFA (just so that it |
---|
168 | // does not have to be passed all around.) Once the DFA is built, |
---|
169 | // this is no longer required so its deleted. |
---|
170 | // |
---|
171 | // fLeafCount |
---|
172 | // The count of leaf nodes. This is an important number that set some |
---|
173 | // limits on the sizes of data structures in the DFA process. |
---|
174 | // |
---|
175 | // fLeafList |
---|
176 | // An array of non-epsilon leaf nodes, which is used during the DFA |
---|
177 | // build operation, then dropped. These are just references to nodes |
---|
178 | // pointed to by fHeadNode, so we don't have to clean them up, just |
---|
179 | // the actually leaf list array itself needs cleanup. |
---|
180 | // |
---|
181 | // fLeafListType |
---|
182 | // Array mapping ANY types to the leaf list. |
---|
183 | // |
---|
184 | // fTransTable |
---|
185 | // fTransTableSize |
---|
186 | // This is the transition table that is the main by product of all |
---|
187 | // of the effort here. It is an array of arrays of ints. The first |
---|
188 | // dimension is the number of states we end up with in the DFA. The |
---|
189 | // second dimensions is the number of unique elements in the content |
---|
190 | // model (fElemMapSize). Each entry in the second dimension indicates |
---|
191 | // the new state given that input for the first dimension's start |
---|
192 | // state. |
---|
193 | // |
---|
194 | // The fElemMap array handles mapping from element indexes to |
---|
195 | // positions in the second dimension of the transition table. |
---|
196 | // |
---|
197 | // fTransTableSize is the number of valid entries in the transition |
---|
198 | // table, and in the other related tables such as fFinalStateFlags. |
---|
199 | // |
---|
200 | // fDTD |
---|
201 | // Boolean to allow DTDs to validate even with namespace support. |
---|
202 | // |
---|
203 | // fIsMixed |
---|
204 | // DFA ContentModel with mixed PCDATA. |
---|
205 | // ----------------------------------------------------------------------- |
---|
206 | QName** fElemMap; |
---|
207 | ContentSpecNode::NodeTypes *fElemMapType; |
---|
208 | unsigned int fElemMapSize; |
---|
209 | bool fEmptyOk; |
---|
210 | unsigned int fEOCPos; |
---|
211 | bool* fFinalStateFlags; |
---|
212 | CMStateSet** fFollowList; |
---|
213 | CMNode* fHeadNode; |
---|
214 | unsigned int fLeafCount; |
---|
215 | CMLeaf** fLeafList; |
---|
216 | ContentSpecNode::NodeTypes *fLeafListType; |
---|
217 | unsigned int** fTransTable; |
---|
218 | unsigned int fTransTableSize; |
---|
219 | bool fDTD; |
---|
220 | bool fIsMixed; |
---|
221 | ContentLeafNameTypeVector *fLeafNameTypeVector; |
---|
222 | MemoryManager* fMemoryManager; |
---|
223 | }; |
---|
224 | |
---|
225 | |
---|
226 | inline unsigned int |
---|
227 | DFAContentModel::getNextState(const unsigned int currentState, |
---|
228 | const unsigned int elementIndex) const { |
---|
229 | |
---|
230 | if (currentState == XMLContentModel::gInvalidTrans) { |
---|
231 | return XMLContentModel::gInvalidTrans; |
---|
232 | } |
---|
233 | |
---|
234 | if (currentState >= fTransTableSize || elementIndex >= fElemMapSize) { |
---|
235 | ThrowXMLwithMemMgr(ArrayIndexOutOfBoundsException, XMLExcepts::Array_BadIndex, fMemoryManager); |
---|
236 | } |
---|
237 | |
---|
238 | return fTransTable[currentState][elementIndex]; |
---|
239 | } |
---|
240 | |
---|
241 | XERCES_CPP_NAMESPACE_END |
---|
242 | |
---|
243 | #endif |
---|
244 | |
---|