[2674] | 1 | /* |
---|
| 2 | * Licensed to the Apache Software Foundation (ASF) under one or more |
---|
| 3 | * contributor license agreements. See the NOTICE file distributed with |
---|
| 4 | * this work for additional information regarding copyright ownership. |
---|
| 5 | * The ASF licenses this file to You under the Apache License, Version 2.0 |
---|
| 6 | * (the "License"); you may not use this file except in compliance with |
---|
| 7 | * the License. You may obtain a copy of the License at |
---|
| 8 | * |
---|
| 9 | * http://www.apache.org/licenses/LICENSE-2.0 |
---|
| 10 | * |
---|
| 11 | * Unless required by applicable law or agreed to in writing, software |
---|
| 12 | * distributed under the License is distributed on an "AS IS" BASIS, |
---|
| 13 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
---|
| 14 | * See the License for the specific language governing permissions and |
---|
| 15 | * limitations under the License. |
---|
| 16 | */ |
---|
| 17 | |
---|
| 18 | /* |
---|
| 19 | * $Id: DFAContentModel.hpp 568078 2007-08-21 11:43:25Z amassari $ |
---|
| 20 | */ |
---|
| 21 | |
---|
| 22 | #if !defined(DFACONTENTMODEL_HPP) |
---|
| 23 | #define DFACONTENTMODEL_HPP |
---|
| 24 | |
---|
| 25 | #include <xercesc/util/XercesDefs.hpp> |
---|
| 26 | #include <xercesc/util/ArrayIndexOutOfBoundsException.hpp> |
---|
| 27 | #include <xercesc/framework/XMLContentModel.hpp> |
---|
| 28 | #include <xercesc/validators/common/ContentLeafNameTypeVector.hpp> |
---|
| 29 | |
---|
| 30 | XERCES_CPP_NAMESPACE_BEGIN |
---|
| 31 | |
---|
| 32 | class ContentSpecNode; |
---|
| 33 | class CMLeaf; |
---|
| 34 | class CMNode; |
---|
| 35 | class CMStateSet; |
---|
| 36 | |
---|
| 37 | // |
---|
| 38 | // DFAContentModel is the heavy weight derivative of ContentModel that does |
---|
| 39 | // all of the non-trivial element content validation. This guy does the full |
---|
| 40 | // bore regular expression to DFA conversion to create a DFA that it then |
---|
| 41 | // uses in its validation algorithm. |
---|
| 42 | // |
---|
| 43 | // NOTE: Upstream work insures that this guy will never see a content model |
---|
| 44 | // with PCDATA in it. Any model with PCDATA is 'mixed' and is handled |
---|
| 45 | // via the MixedContentModel class, since mixed models are very |
---|
| 46 | // constrained in form and easily handled via a special case. This |
---|
| 47 | // also makes our life much easier here. |
---|
| 48 | // |
---|
| 49 | class DFAContentModel : public XMLContentModel |
---|
| 50 | { |
---|
| 51 | public: |
---|
| 52 | // ----------------------------------------------------------------------- |
---|
| 53 | // Constructors and Destructor |
---|
| 54 | // ----------------------------------------------------------------------- |
---|
| 55 | DFAContentModel |
---|
| 56 | ( |
---|
| 57 | const bool dtd |
---|
| 58 | , ContentSpecNode* const elemContentSpec |
---|
| 59 | , MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager |
---|
| 60 | ); |
---|
| 61 | DFAContentModel |
---|
| 62 | ( |
---|
| 63 | const bool dtd |
---|
| 64 | , ContentSpecNode* const elemContentSpec |
---|
| 65 | , const bool isMixed |
---|
| 66 | , MemoryManager* const manager |
---|
| 67 | ); |
---|
| 68 | |
---|
| 69 | virtual ~DFAContentModel(); |
---|
| 70 | |
---|
| 71 | |
---|
| 72 | // ----------------------------------------------------------------------- |
---|
| 73 | // Implementation of the virtual content model interface |
---|
| 74 | // ----------------------------------------------------------------------- |
---|
| 75 | virtual int validateContent |
---|
| 76 | ( |
---|
| 77 | QName** const children |
---|
| 78 | , const unsigned int childCount |
---|
| 79 | , const unsigned int emptyNamespaceId |
---|
| 80 | ) const; |
---|
| 81 | |
---|
| 82 | virtual int validateContentSpecial |
---|
| 83 | ( |
---|
| 84 | QName** const children |
---|
| 85 | , const unsigned int childCount |
---|
| 86 | , const unsigned int emptyNamespaceId |
---|
| 87 | , GrammarResolver* const pGrammarResolver |
---|
| 88 | , XMLStringPool* const pStringPool |
---|
| 89 | ) const; |
---|
| 90 | |
---|
| 91 | virtual void checkUniqueParticleAttribution |
---|
| 92 | ( |
---|
| 93 | SchemaGrammar* const pGrammar |
---|
| 94 | , GrammarResolver* const pGrammarResolver |
---|
| 95 | , XMLStringPool* const pStringPool |
---|
| 96 | , XMLValidator* const pValidator |
---|
| 97 | , unsigned int* const pContentSpecOrgURI |
---|
| 98 | , const XMLCh* pComplexTypeName = 0 |
---|
| 99 | ) ; |
---|
| 100 | |
---|
| 101 | virtual ContentLeafNameTypeVector* getContentLeafNameTypeVector() const ; |
---|
| 102 | |
---|
| 103 | virtual unsigned int getNextState(const unsigned int currentState, |
---|
| 104 | const unsigned int elementIndex) const; |
---|
| 105 | |
---|
| 106 | private : |
---|
| 107 | // ----------------------------------------------------------------------- |
---|
| 108 | // Unimplemented constructors and operators |
---|
| 109 | // ----------------------------------------------------------------------- |
---|
| 110 | DFAContentModel(); |
---|
| 111 | DFAContentModel(const DFAContentModel&); |
---|
| 112 | DFAContentModel& operator=(const DFAContentModel&); |
---|
| 113 | |
---|
| 114 | |
---|
| 115 | // ----------------------------------------------------------------------- |
---|
| 116 | // Private helper methods |
---|
| 117 | // ----------------------------------------------------------------------- |
---|
| 118 | void buildDFA(ContentSpecNode* const curNode); |
---|
| 119 | CMNode* buildSyntaxTree(ContentSpecNode* const curNode); |
---|
| 120 | void calcFollowList(CMNode* const curNode); |
---|
| 121 | unsigned int* makeDefStateList() const; |
---|
| 122 | int postTreeBuildInit |
---|
| 123 | ( |
---|
| 124 | CMNode* const nodeCur |
---|
| 125 | , const unsigned int curIndex |
---|
| 126 | ); |
---|
| 127 | |
---|
| 128 | |
---|
| 129 | // ----------------------------------------------------------------------- |
---|
| 130 | // Private data members |
---|
| 131 | // |
---|
| 132 | // fElemMap |
---|
| 133 | // fElemMapSize |
---|
| 134 | // This is the map of unique input symbol elements to indices into |
---|
| 135 | // each state's per-input symbol transition table entry. This is part |
---|
| 136 | // of the built DFA information that must be kept around to do the |
---|
| 137 | // actual validation. |
---|
| 138 | // |
---|
| 139 | // fElemMapType |
---|
| 140 | // This is a map of whether the element map contains information |
---|
| 141 | // related to ANY models. |
---|
| 142 | // |
---|
| 143 | // fEmptyOk |
---|
| 144 | // This is an optimization. While building the transition table we |
---|
| 145 | // can see whether this content model would approve of an empty |
---|
| 146 | // content (which could happen if everything was optional.) So we |
---|
| 147 | // set this flag and short circuit that check, which would otherwise |
---|
| 148 | // be ugly and time consuming if we tried to determine it at each |
---|
| 149 | // validation call. |
---|
| 150 | // |
---|
| 151 | // fEOCPos |
---|
| 152 | // The NFA position of the special EOC (end of content) node. This |
---|
| 153 | // is saved away since its used during the DFA build. |
---|
| 154 | // |
---|
| 155 | // fFinalStateFlags |
---|
| 156 | // This is an array of booleans, one per state (there are |
---|
| 157 | // fTransTableSize states in the DFA) that indicates whether that |
---|
| 158 | // state is a final state. |
---|
| 159 | // |
---|
| 160 | // fFollowList |
---|
| 161 | // The list of follow positions for each NFA position (i.e. for each |
---|
| 162 | // non-epsilon leaf node.) This is only used during the building of |
---|
| 163 | // the DFA, and is let go afterwards. |
---|
| 164 | // |
---|
| 165 | // fHeadNode |
---|
| 166 | // This is the head node of our intermediate representation. It is |
---|
| 167 | // only non-null during the building of the DFA (just so that it |
---|
| 168 | // does not have to be passed all around.) Once the DFA is built, |
---|
| 169 | // this is no longer required so its deleted. |
---|
| 170 | // |
---|
| 171 | // fLeafCount |
---|
| 172 | // The count of leaf nodes. This is an important number that set some |
---|
| 173 | // limits on the sizes of data structures in the DFA process. |
---|
| 174 | // |
---|
| 175 | // fLeafList |
---|
| 176 | // An array of non-epsilon leaf nodes, which is used during the DFA |
---|
| 177 | // build operation, then dropped. These are just references to nodes |
---|
| 178 | // pointed to by fHeadNode, so we don't have to clean them up, just |
---|
| 179 | // the actually leaf list array itself needs cleanup. |
---|
| 180 | // |
---|
| 181 | // fLeafListType |
---|
| 182 | // Array mapping ANY types to the leaf list. |
---|
| 183 | // |
---|
| 184 | // fTransTable |
---|
| 185 | // fTransTableSize |
---|
| 186 | // This is the transition table that is the main by product of all |
---|
| 187 | // of the effort here. It is an array of arrays of ints. The first |
---|
| 188 | // dimension is the number of states we end up with in the DFA. The |
---|
| 189 | // second dimensions is the number of unique elements in the content |
---|
| 190 | // model (fElemMapSize). Each entry in the second dimension indicates |
---|
| 191 | // the new state given that input for the first dimension's start |
---|
| 192 | // state. |
---|
| 193 | // |
---|
| 194 | // The fElemMap array handles mapping from element indexes to |
---|
| 195 | // positions in the second dimension of the transition table. |
---|
| 196 | // |
---|
| 197 | // fTransTableSize is the number of valid entries in the transition |
---|
| 198 | // table, and in the other related tables such as fFinalStateFlags. |
---|
| 199 | // |
---|
| 200 | // fDTD |
---|
| 201 | // Boolean to allow DTDs to validate even with namespace support. |
---|
| 202 | // |
---|
| 203 | // fIsMixed |
---|
| 204 | // DFA ContentModel with mixed PCDATA. |
---|
| 205 | // ----------------------------------------------------------------------- |
---|
| 206 | QName** fElemMap; |
---|
| 207 | ContentSpecNode::NodeTypes *fElemMapType; |
---|
| 208 | unsigned int fElemMapSize; |
---|
| 209 | bool fEmptyOk; |
---|
| 210 | unsigned int fEOCPos; |
---|
| 211 | bool* fFinalStateFlags; |
---|
| 212 | CMStateSet** fFollowList; |
---|
| 213 | CMNode* fHeadNode; |
---|
| 214 | unsigned int fLeafCount; |
---|
| 215 | CMLeaf** fLeafList; |
---|
| 216 | ContentSpecNode::NodeTypes *fLeafListType; |
---|
| 217 | unsigned int** fTransTable; |
---|
| 218 | unsigned int fTransTableSize; |
---|
| 219 | bool fDTD; |
---|
| 220 | bool fIsMixed; |
---|
| 221 | ContentLeafNameTypeVector *fLeafNameTypeVector; |
---|
| 222 | MemoryManager* fMemoryManager; |
---|
| 223 | }; |
---|
| 224 | |
---|
| 225 | |
---|
| 226 | inline unsigned int |
---|
| 227 | DFAContentModel::getNextState(const unsigned int currentState, |
---|
| 228 | const unsigned int elementIndex) const { |
---|
| 229 | |
---|
| 230 | if (currentState == XMLContentModel::gInvalidTrans) { |
---|
| 231 | return XMLContentModel::gInvalidTrans; |
---|
| 232 | } |
---|
| 233 | |
---|
| 234 | if (currentState >= fTransTableSize || elementIndex >= fElemMapSize) { |
---|
| 235 | ThrowXMLwithMemMgr(ArrayIndexOutOfBoundsException, XMLExcepts::Array_BadIndex, fMemoryManager); |
---|
| 236 | } |
---|
| 237 | |
---|
| 238 | return fTransTable[currentState][elementIndex]; |
---|
| 239 | } |
---|
| 240 | |
---|
| 241 | XERCES_CPP_NAMESPACE_END |
---|
| 242 | |
---|
| 243 | #endif |
---|
| 244 | |
---|