[692] | 1 | /*
|
---|
| 2 | -----------------------------------------------------------------------------
|
---|
| 3 | This source file is part of OGRE
|
---|
| 4 | (Object-oriented Graphics Rendering Engine)
|
---|
| 5 | For the latest info, see http://www.stevestreeting.com/ogre/
|
---|
| 6 |
|
---|
| 7 | Copyright (c) 2000-2005 The OGRE Team
|
---|
| 8 | Also see acknowledgements in Readme.html
|
---|
| 9 |
|
---|
| 10 | This program is free software; you can redistribute it and/or modify it under
|
---|
| 11 | the terms of the GNU General Public License as published by the Free Software
|
---|
| 12 | Foundation; either version 2 of the License, or (at your option) any later
|
---|
| 13 | version.
|
---|
| 14 |
|
---|
| 15 | This program is distributed in the hope that it will be useful, but WITHOUT
|
---|
| 16 | ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
---|
| 17 | FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
|
---|
| 18 |
|
---|
| 19 | You should have received a copy of the GNU General Public License along with
|
---|
| 20 | this program; if not, write to the Free Software Foundation, Inc., 59 Temple
|
---|
| 21 | Place - Suite 330, Boston, MA 02111-1307, USA, or go to
|
---|
| 22 | http://www.gnu.org/copyleft/gpl.html.
|
---|
| 23 | -----------------------------------------------------------------------------
|
---|
| 24 | */
|
---|
| 25 | #include "OgreStableHeaders.h"
|
---|
| 26 | #include "OgreCompiler2Pass.h"
|
---|
| 27 | #include "OgreLogManager.h"
|
---|
| 28 | #include "OgreException.h"
|
---|
| 29 | #include "OgreStringConverter.h"
|
---|
| 30 |
|
---|
| 31 | namespace Ogre {
|
---|
| 32 | //-----------------------------------------------------------------------
|
---|
| 33 | // instantiate static members
|
---|
| 34 | Compiler2Pass::TokenState Compiler2Pass::mBNFTokenState;
|
---|
| 35 | Compiler2Pass::TokenStateContainer Compiler2Pass::mClientTokenStates;
|
---|
| 36 | //-----------------------------------------------------------------------
|
---|
| 37 | Compiler2Pass::Compiler2Pass()
|
---|
| 38 | : mActiveTokenState(&mBNFTokenState)
|
---|
| 39 | , mSource(0)
|
---|
| 40 | , mSourceName("system")
|
---|
| 41 | {
|
---|
| 42 | // reserve some memory space in the containers being used
|
---|
| 43 | mBNFTokenState.tokenQue.reserve(100);
|
---|
| 44 | mBNFTokenState.lexemeTokenDefinitions.reserve(50);
|
---|
| 45 |
|
---|
| 46 | initBNFCompiler();
|
---|
| 47 | }
|
---|
| 48 | //-----------------------------------------------------------------------
|
---|
| 49 | void Compiler2Pass::initBNFCompiler(void)
|
---|
| 50 | {
|
---|
| 51 | if (mBNFTokenState.lexemeTokenDefinitions.empty())
|
---|
| 52 | {
|
---|
| 53 | addLexemeToken("UNKNOWN", BNF_UNKOWN);
|
---|
| 54 | addLexemeToken("syntax", BNF_SYNTAX);
|
---|
| 55 | addLexemeToken("rule", BNF_RULE);
|
---|
| 56 | addLexemeToken("identifier", BNF_IDENTIFIER);
|
---|
| 57 | addLexemeToken("identifier_right", BNF_IDENTIFIER_RIGHT);
|
---|
| 58 | addLexemeToken("identifier_characters", BNF_IDENTIFIER_CHARACTERS);
|
---|
| 59 | addLexemeToken("<", BNF_ID_BEGIN, false, true);
|
---|
| 60 | addLexemeToken(">", BNF_ID_END, false, true);
|
---|
| 61 | addLexemeToken("<#", BNF_CONSTANT_BEGIN, false, true);
|
---|
| 62 | addLexemeToken("::=", BNF_SET_RULE, false, true);
|
---|
| 63 | addLexemeToken("expression", BNF_EXPRESSION);
|
---|
| 64 | addLexemeToken("and_term", BNF_AND_TERM);
|
---|
| 65 | addLexemeToken("or_term", BNF_OR_TERM);
|
---|
| 66 | addLexemeToken("term", BNF_TERM);
|
---|
| 67 | addLexemeToken("term_id", BNF_TERM_ID);
|
---|
| 68 | addLexemeToken("constant", BNF_CONSTANT);
|
---|
| 69 | addLexemeToken("|", BNF_OR, false, true);
|
---|
| 70 | addLexemeToken("terminal_symbol", BNF_TERMINAL_SYMBOL);
|
---|
| 71 | addLexemeToken("terminal_start", BNF_TERMINAL_START);
|
---|
| 72 | addLexemeToken("repeat_expression", BNF_REPEAT_EXPRESSION);
|
---|
| 73 | addLexemeToken("not_expression", BNF_NOT_EXPRESSION);
|
---|
| 74 | addLexemeToken("{", BNF_REPEAT_BEGIN, false, true);
|
---|
| 75 | addLexemeToken("}", BNF_REPEAT_END, false, true);
|
---|
| 76 | addLexemeToken("set", BNF_SET);
|
---|
| 77 | addLexemeToken("(", BNF_SET_BEGIN, false, true);
|
---|
| 78 | addLexemeToken(")", BNF_SET_END, false, true);
|
---|
| 79 | addLexemeToken("set_end_exc", BNF_SET_END_EXC);
|
---|
| 80 | addLexemeToken("optional_expression", BNF_OPTIONAL_EXPRESSION);
|
---|
| 81 | addLexemeToken("[", BNF_OPTIONAL_BEGIN, false, true);
|
---|
| 82 | addLexemeToken("]", BNF_OPTIONAL_END, false, true);
|
---|
| 83 | addLexemeToken("not_test", BNF_NOT_TEST);
|
---|
| 84 | addLexemeToken("not_chk", BNF_NOT_CHK);
|
---|
| 85 | addLexemeToken("(?!", BNF_NOT_TEST_BEGIN, false, true);
|
---|
| 86 | addLexemeToken("'", BNF_SINGLEQUOTE, false, true);
|
---|
| 87 | addLexemeToken("-'", BNF_NO_TOKEN_START, false, true);
|
---|
| 88 | addLexemeToken("any_character", BNF_ANY_CHARACTER);
|
---|
| 89 | addLexemeToken("single_quote_exc", BNF_SINGLE_QUOTE_EXC);
|
---|
| 90 | addLexemeToken("white_space_chk", BNF_WHITE_SPACE_CHK);
|
---|
| 91 | addLexemeToken("special_characters1", BNF_SPECIAL_CHARACTERS1);
|
---|
| 92 | addLexemeToken("special_characters2", BNF_SPECIAL_CHARACTERS2);
|
---|
| 93 |
|
---|
| 94 | addLexemeToken("letter", BNF_LETTER);
|
---|
| 95 | addLexemeToken("letter_digit", BNF_LETTER_DIGIT);
|
---|
| 96 | addLexemeToken("digit", BNF_DIGIT);
|
---|
| 97 | addLexemeToken("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", BNF_ALPHA_SET, false, true);
|
---|
| 98 | addLexemeToken("0123456789", BNF_NUMBER_SET, false, true);
|
---|
| 99 | addLexemeToken("`~!@#$%^&*(-_=+\\|[]{}:;\"<>,.?/", BNF_SPECIAL_CHARACTER_SET2, false, true);
|
---|
| 100 | addLexemeToken("$_", BNF_SPECIAL_CHARACTER_SET1, false, true);
|
---|
| 101 | addLexemeToken(" ", BNF_WHITE_SPACE, false, true);
|
---|
| 102 | addLexemeToken("?!", BNF_NOT_CHARS, false, true);
|
---|
| 103 | }
|
---|
| 104 |
|
---|
| 105 | if (mBNFTokenState.rootRulePath.empty())
|
---|
| 106 | {
|
---|
| 107 | // first entry is set to unknown in order to trap rule id's not set for non-terminal tokens
|
---|
| 108 | mBNFTokenState.rootRulePath.resize(1);
|
---|
| 109 | // used by bootstrap BNF text parser
|
---|
| 110 | // <> - non-terminal token
|
---|
| 111 | // () - set of
|
---|
| 112 | // ::= - rule definition
|
---|
| 113 | #define _rule_(id) mBNFTokenState.rootRulePath.push_back(TokenRule(otRULE, id));
|
---|
| 114 | #define _is_(id) mBNFTokenState.rootRulePath.push_back(TokenRule(otAND, id));
|
---|
| 115 | // - blank space is an implied "AND" meaning the token is required
|
---|
| 116 | #define _and_(id) mBNFTokenState.rootRulePath.push_back(TokenRule(otAND, id));
|
---|
| 117 | // | - or
|
---|
| 118 | #define _or_(id) mBNFTokenState.rootRulePath.push_back(TokenRule(otOR, id));
|
---|
| 119 | // [] - optional
|
---|
| 120 | #define _optional_(id) mBNFTokenState.rootRulePath.push_back(TokenRule(otOPTIONAL, id));
|
---|
| 121 | // {} - repeat 0 or more times until fail or rule does not progress
|
---|
| 122 | #define _repeat_(id) mBNFTokenState.rootRulePath.push_back(TokenRule(otREPEAT, id));
|
---|
| 123 | #define _data_(id) mBNFTokenState.rootRulePath.push_back(TokenRule(otDATA, id));
|
---|
| 124 | // (?! ) - lookahead negative (not test)
|
---|
| 125 | #define _not_(id) mBNFTokenState.rootRulePath.push_back(TokenRule(otNOT_TEST, id));
|
---|
| 126 | #define _end_ mBNFTokenState.rootRulePath.push_back(TokenRule(otEND,0));
|
---|
| 127 |
|
---|
| 128 | // <syntax> ::= { rule }
|
---|
| 129 | _rule_(BNF_SYNTAX) _repeat_(BNF_RULE) _end_
|
---|
| 130 |
|
---|
| 131 | // <rule> ::= <identifier> "::=" <expression>
|
---|
| 132 | _rule_(BNF_RULE)
|
---|
| 133 | _is_(BNF_IDENTIFIER)
|
---|
| 134 | _and_(BNF_SET_RULE)
|
---|
| 135 | _and_(BNF_EXPRESSION)
|
---|
| 136 | _end_
|
---|
| 137 |
|
---|
| 138 | // <expression> ::= <and_term> { <or_term> }
|
---|
| 139 | _rule_(BNF_EXPRESSION)
|
---|
| 140 | _is_(BNF_AND_TERM)
|
---|
| 141 | _repeat_(BNF_OR_TERM)
|
---|
| 142 | _end_
|
---|
| 143 | // <or_term> ::= "|" <and_term>
|
---|
| 144 | _rule_(BNF_OR_TERM)
|
---|
| 145 | _is_(BNF_OR)
|
---|
| 146 | _and_(BNF_AND_TERM)
|
---|
| 147 | _end_
|
---|
| 148 | // <and_term> ::= <term> { <term> }
|
---|
| 149 | _rule_(BNF_AND_TERM)
|
---|
| 150 | _is_(BNF_TERM)
|
---|
| 151 | _repeat_(BNF_TERM)
|
---|
| 152 | _end_
|
---|
| 153 | // <term> ::= <term_id> | <repeat_expression> | <optional_expression> | <not_expression>
|
---|
| 154 | _rule_(BNF_TERM)
|
---|
| 155 | _is_(BNF_TERM_ID)
|
---|
| 156 | _or_(BNF_REPEAT_EXPRESSION)
|
---|
| 157 | _or_(BNF_OPTIONAL_EXPRESSION)
|
---|
| 158 | _or_(BNF_NOT_EXPRESSION)
|
---|
| 159 | _end_
|
---|
| 160 |
|
---|
| 161 | // <term_id> ::= <constant> | <identifier_right> | <terminal_symbol> | <set>
|
---|
| 162 | _rule_(BNF_TERM_ID)
|
---|
| 163 | _is_(BNF_CONSTANT)
|
---|
| 164 | _or_(BNF_IDENTIFIER_RIGHT)
|
---|
| 165 | _or_(BNF_TERMINAL_SYMBOL)
|
---|
| 166 | _or_(BNF_SET)
|
---|
| 167 | _end_
|
---|
| 168 |
|
---|
| 169 | // <repeat_expression> ::= "{" <term_id> "}"
|
---|
| 170 | _rule_(BNF_REPEAT_EXPRESSION)
|
---|
| 171 | _is_(BNF_REPEAT_BEGIN)
|
---|
| 172 | _and_(BNF_TERM_ID)
|
---|
| 173 | _and_(BNF_REPEAT_END)
|
---|
| 174 | _end_
|
---|
| 175 |
|
---|
| 176 | // <optional_expression> ::= "[" <term_id> "]"
|
---|
| 177 | _rule_(BNF_OPTIONAL_EXPRESSION)
|
---|
| 178 | _is_(BNF_OPTIONAL_BEGIN)
|
---|
| 179 | _and_(BNF_TERM_ID)
|
---|
| 180 | _and_(BNF_OPTIONAL_END)
|
---|
| 181 | _end_
|
---|
| 182 |
|
---|
| 183 | // <not_expression> ::= "(?!" <term_id> ")"
|
---|
| 184 | _rule_(BNF_NOT_EXPRESSION)
|
---|
| 185 | _is_(BNF_NOT_TEST_BEGIN)
|
---|
| 186 | _and_(BNF_TERM_ID)
|
---|
| 187 | _and_(BNF_SET_END)
|
---|
| 188 | _end_
|
---|
| 189 |
|
---|
| 190 | // <identifier_right> ::= <indentifier> (?!"::=")
|
---|
| 191 | _rule_(BNF_IDENTIFIER_RIGHT)
|
---|
| 192 | _is_(BNF_IDENTIFIER)
|
---|
| 193 | _not_(BNF_SET_RULE)
|
---|
| 194 | _end_
|
---|
| 195 |
|
---|
| 196 | // <identifier> ::= "<" <letter> {<identifier_characters>} ">"
|
---|
| 197 | _rule_(BNF_IDENTIFIER)
|
---|
| 198 | _is_(BNF_ID_BEGIN)
|
---|
| 199 | _and_(BNF_LETTER)
|
---|
| 200 | _repeat_(BNF_IDENTIFIER_CHARACTERS)
|
---|
| 201 | _and_(BNF_ID_END)
|
---|
| 202 | _end_
|
---|
| 203 |
|
---|
| 204 | // <identifier_characters> ::= <letter_digit> | <special_characters1>
|
---|
| 205 | _rule_(BNF_IDENTIFIER_CHARACTERS)
|
---|
| 206 | _is_(BNF_LETTER_DIGIT)
|
---|
| 207 | _or_(BNF_SPECIAL_CHARACTERS1)
|
---|
| 208 | _end_
|
---|
| 209 |
|
---|
| 210 | // <terminal_symbol> ::= <terminal_start> @{ <any_character> } "'"
|
---|
| 211 | _rule_(BNF_TERMINAL_SYMBOL)
|
---|
| 212 | _is_(BNF_TERMINAL_START)
|
---|
| 213 | _and_(_no_space_skip_)
|
---|
| 214 | _repeat_(BNF_ANY_CHARACTER)
|
---|
| 215 | _and_(BNF_SINGLEQUOTE)
|
---|
| 216 | _end_
|
---|
| 217 |
|
---|
| 218 | // <terminal_start> ::= "-'" | "'"
|
---|
| 219 | _rule_(BNF_TERMINAL_START)
|
---|
| 220 | _is_(BNF_NO_TOKEN_START)
|
---|
| 221 | _or_(BNF_SINGLEQUOTE)
|
---|
| 222 | _end_
|
---|
| 223 |
|
---|
| 224 | // <constant> ::= "<#" <letter> {<identifier_characters>} ">"
|
---|
| 225 | _rule_(BNF_CONSTANT)
|
---|
| 226 | _is_(BNF_CONSTANT_BEGIN)
|
---|
| 227 | _and_(BNF_LETTER)
|
---|
| 228 | _repeat_(BNF_IDENTIFIER_CHARACTERS)
|
---|
| 229 | _and_(BNF_ID_END)
|
---|
| 230 | _end_
|
---|
| 231 |
|
---|
| 232 | // <set> ::= "(" (?!<not_chk>) @{<any_character>} ")"
|
---|
| 233 | _rule_(BNF_SET)
|
---|
| 234 | _is_(BNF_SET_BEGIN)
|
---|
| 235 | _not_(BNF_NOT_CHK)
|
---|
| 236 | _and_(_no_space_skip_)
|
---|
| 237 | _repeat_(BNF_ANY_CHARACTER)
|
---|
| 238 | _and_(BNF_SET_END)
|
---|
| 239 | _end_
|
---|
| 240 |
|
---|
| 241 | // <any_character> ::= <letter_digit> | <special_characters2>
|
---|
| 242 | _rule_(BNF_ANY_CHARACTER)
|
---|
| 243 | _is_(BNF_LETTER_DIGIT)
|
---|
| 244 | _or_(BNF_SPECIAL_CHARACTERS2)
|
---|
| 245 | _end_
|
---|
| 246 |
|
---|
| 247 | // <letter_digit> ::= <letter> | <digit>
|
---|
| 248 | _rule_(BNF_LETTER_DIGIT)
|
---|
| 249 | _is_(BNF_LETTER)
|
---|
| 250 | _or_(BNF_DIGIT)
|
---|
| 251 | _end_
|
---|
| 252 |
|
---|
| 253 | // <letter> ::= (abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ)
|
---|
| 254 | _rule_(BNF_LETTER)
|
---|
| 255 | _is_(_character_)
|
---|
| 256 | _data_(BNF_ALPHA_SET)// "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_"
|
---|
| 257 | _end_
|
---|
| 258 |
|
---|
| 259 | // <digit> ::= (0123456789)
|
---|
| 260 | _rule_(BNF_DIGIT)
|
---|
| 261 | _is_(_character_)
|
---|
| 262 | _data_(BNF_NUMBER_SET)
|
---|
| 263 | _end_
|
---|
| 264 |
|
---|
| 265 | // <special_characters1> ::= ($_)
|
---|
| 266 | _rule_(BNF_SPECIAL_CHARACTERS1)
|
---|
| 267 | _is_(_character_)
|
---|
| 268 | _data_(BNF_SPECIAL_CHARACTER_SET1)
|
---|
| 269 | _end_
|
---|
| 270 |
|
---|
| 271 | // <special_characters2> ::= (`~!@#$%^&*(-_=+\|[]{}:;"<>,.?/) | <single_quote_exc>
|
---|
| 272 | // | <white_space_chk> | <set_end_exc>
|
---|
| 273 | _rule_(BNF_SPECIAL_CHARACTERS2)
|
---|
| 274 | _is_(_character_)
|
---|
| 275 | _data_(BNF_SPECIAL_CHARACTER_SET2)
|
---|
| 276 | _or_(BNF_WHITE_SPACE_CHK)
|
---|
| 277 | _or_(BNF_SINGLE_QUOTE_EXC)
|
---|
| 278 | _or_(BNF_SET_END_EXC)
|
---|
| 279 | _end_
|
---|
| 280 |
|
---|
| 281 | // <single_quote_exc> ::= "'" (?!" ")
|
---|
| 282 | _rule_(BNF_SINGLE_QUOTE_EXC)
|
---|
| 283 | _is_(_character_)
|
---|
| 284 | _data_(BNF_SINGLEQUOTE)
|
---|
| 285 | _not_(BNF_WHITE_SPACE_CHK)
|
---|
| 286 | _end_
|
---|
| 287 |
|
---|
| 288 | // <set_end_exc> ::= ")" (?!" ")
|
---|
| 289 | _rule_(BNF_SET_END_EXC)
|
---|
| 290 | _is_(_character_)
|
---|
| 291 | _data_(BNF_SET_END)
|
---|
| 292 | _not_(BNF_WHITE_SPACE_CHK)
|
---|
| 293 | _end_
|
---|
| 294 |
|
---|
| 295 | // <white_space_chk> ::= ( )
|
---|
| 296 | _rule_(BNF_WHITE_SPACE_CHK)
|
---|
| 297 | _is_(_character_)
|
---|
| 298 | _data_(BNF_WHITE_SPACE)
|
---|
| 299 | _end_
|
---|
| 300 | // <not_chk> ::= (?!)
|
---|
| 301 | _rule_(BNF_NOT_CHK)
|
---|
| 302 | _is_(_character_)
|
---|
| 303 | _data_(BNF_NOT_CHARS)
|
---|
| 304 | _end_
|
---|
| 305 |
|
---|
| 306 | // now that all the rules are added, update token definitions with rule links
|
---|
| 307 | verifyTokenRuleLinks("system");
|
---|
| 308 | }
|
---|
| 309 | // switch to client state
|
---|
| 310 | mActiveTokenState = mClientTokenState;
|
---|
| 311 | }
|
---|
| 312 |
|
---|
| 313 | //-----------------------------------------------------------------------
|
---|
| 314 | void Compiler2Pass::verifyTokenRuleLinks(const String& grammerName)
|
---|
| 315 | {
|
---|
| 316 | size_t token_ID;
|
---|
| 317 |
|
---|
| 318 | // scan through all the rules and initialize index to rules for non-terminal tokens
|
---|
| 319 | const size_t ruleCount = mActiveTokenState->rootRulePath.size();
|
---|
| 320 | for (size_t i = 0; i < ruleCount; ++i)
|
---|
| 321 | {
|
---|
| 322 | // make sure token definition holds valid token
|
---|
| 323 | if (mActiveTokenState->rootRulePath[i].operation == otRULE)
|
---|
| 324 | {
|
---|
| 325 | token_ID = mActiveTokenState->rootRulePath[i].tokenID;
|
---|
| 326 | // system token id's can never have a rule assigned to them so no need to check if token is system token
|
---|
| 327 | // but do make sure the id is within defined bounds
|
---|
| 328 | if (token_ID >= mActiveTokenState->lexemeTokenDefinitions.size())
|
---|
| 329 | OGRE_EXCEPT(Exception::ERR_INTERNAL_ERROR, "For grammer: " + grammerName +
|
---|
| 330 | ", a token ID was out of token definition range.",
|
---|
| 331 | "Compiler2Pass::verifyTokenRuleLinks");
|
---|
| 332 |
|
---|
| 333 | LexemeTokenDef& tokenDef = mActiveTokenState->lexemeTokenDefinitions[token_ID];
|
---|
| 334 | if (tokenDef.ID != token_ID)
|
---|
| 335 | OGRE_EXCEPT(Exception::ERR_INTERNAL_ERROR, "For grammer: " + grammerName +
|
---|
| 336 | ", lexeme non-terminal token definition: " +
|
---|
| 337 | tokenDef.lexeme + " is corrupted and does not match its assigned rule.",
|
---|
| 338 | "Compiler2Pass::verifyTokenRuleLinks");
|
---|
| 339 | // if operation is a rule then update token definition
|
---|
| 340 | tokenDef.ruleID = i;
|
---|
| 341 | tokenDef.isNonTerminal = true;
|
---|
| 342 | }
|
---|
| 343 | } // end for
|
---|
| 344 |
|
---|
| 345 | // test all non terminals for valid rule ID
|
---|
| 346 | const size_t definitionCount = mActiveTokenState->lexemeTokenDefinitions.size();
|
---|
| 347 | bool errorsFound = false;
|
---|
| 348 | // report all non-terminals that don't have a rule then throw an exception
|
---|
| 349 | for (token_ID = 0; token_ID < definitionCount; ++token_ID)
|
---|
| 350 | {
|
---|
| 351 | const LexemeTokenDef& tokenDef = mActiveTokenState->lexemeTokenDefinitions[token_ID];
|
---|
| 352 | if (tokenDef.isNonTerminal && (tokenDef.ruleID == 0))
|
---|
| 353 | {
|
---|
| 354 | errorsFound = true;
|
---|
| 355 | LogManager::getSingleton().logMessage(
|
---|
| 356 | "For grammer: " + grammerName +
|
---|
| 357 | ", lexeme non-terminal token definition: " + tokenDef.lexeme +
|
---|
| 358 | " found with no rule definition or corrupted."
|
---|
| 359 | );
|
---|
| 360 | }
|
---|
| 361 | }
|
---|
| 362 | if (errorsFound)
|
---|
| 363 | {
|
---|
| 364 | OGRE_EXCEPT(Exception::ERR_INTERNAL_ERROR, "For grammer: " + grammerName +
|
---|
| 365 | ", lexeme non-terminal token definition(s) found with no rule definition or corrupted.",
|
---|
| 366 | "Compiler2Pass::verifyTokenRuleLinks");
|
---|
| 367 | }
|
---|
| 368 | }
|
---|
| 369 |
|
---|
| 370 | //-----------------------------------------------------------------------
|
---|
| 371 | bool Compiler2Pass::compile(const String& source, const String& sourceName)
|
---|
| 372 | {
|
---|
| 373 | // make sure BNF compiler is setup to compile BNF grammer if required
|
---|
| 374 | initBNFCompiler();
|
---|
| 375 | // compile the client's BNF grammer
|
---|
| 376 | setClientBNFGrammer();
|
---|
| 377 |
|
---|
| 378 | bool Passed = false;
|
---|
| 379 |
|
---|
| 380 | mSource = &source;
|
---|
| 381 | mSourceName = sourceName;
|
---|
| 382 | mActiveTokenState = mClientTokenState;
|
---|
| 383 | // start compiling if there is a rule base to work with
|
---|
| 384 | if (mActiveTokenState->rootRulePath.size() > 1)
|
---|
| 385 | {
|
---|
| 386 | Passed = doPass1();
|
---|
| 387 |
|
---|
| 388 | if (Passed)
|
---|
| 389 | {
|
---|
| 390 | Passed = doPass2();
|
---|
| 391 | }
|
---|
| 392 | }
|
---|
| 393 |
|
---|
| 394 | return Passed;
|
---|
| 395 | }
|
---|
| 396 |
|
---|
| 397 | //-----------------------------------------------------------------------
|
---|
| 398 | bool Compiler2Pass::doPass1()
|
---|
| 399 | {
|
---|
| 400 | // scan through Source string and build a token list using TokenInstructions
|
---|
| 401 | // this is a simple brute force lexical scanner/analyzer that also parses the formed
|
---|
| 402 | // token for proper semantics and context in one pass
|
---|
| 403 |
|
---|
| 404 | mCurrentLine = 1;
|
---|
| 405 | mCharPos = 0;
|
---|
| 406 | // reset position in Constants container
|
---|
| 407 | mConstants.clear();
|
---|
| 408 | mLabels.clear();
|
---|
| 409 | // there is no active label when first starting pass 1
|
---|
| 410 | mLabelIsActive = false;
|
---|
| 411 | mActiveLabelKey = 0;
|
---|
| 412 | mEndOfSource = mSource->length();
|
---|
| 413 |
|
---|
| 414 | // start with a clean slate
|
---|
| 415 | mActiveTokenState->tokenQue.clear();
|
---|
| 416 | mPass2TokenQuePosition = 0;
|
---|
| 417 | mPreviousActionQuePosition = 0;
|
---|
| 418 | mNoTerminalToken = false;
|
---|
| 419 | mNoSpaceSkip = false;
|
---|
| 420 | // tokenize and check semantics untill an error occurs or end of source is reached
|
---|
| 421 | // assume RootRulePath has pointer to rules so start at index + 1 for first rule path
|
---|
| 422 | // first rule token would be a rule definition so skip over it
|
---|
| 423 | bool passed = false;
|
---|
| 424 |
|
---|
| 425 | try
|
---|
| 426 | {
|
---|
| 427 | passed = processRulePath(1);
|
---|
| 428 | // if a lexeme in source still exists then the end of source was not reached and there was a problem some where
|
---|
| 429 | if (positionToNextLexeme()) passed = false;
|
---|
| 430 | if (passed)
|
---|
| 431 | {
|
---|
| 432 | // special condition at end of script. The last action needs to be triggered if
|
---|
| 433 | // parsing reached the end of the source.
|
---|
| 434 | activatePreviousTokenAction();
|
---|
| 435 | }
|
---|
| 436 |
|
---|
| 437 | }
|
---|
| 438 | catch (Exception& e)
|
---|
| 439 | {
|
---|
| 440 | LogManager::getSingleton().logMessage( "Exception caught: "
|
---|
| 441 | + e.getFullDescription()
|
---|
| 442 | + ", while trying to parse: "
|
---|
| 443 | + getClientGrammerName()
|
---|
| 444 | + ": "
|
---|
| 445 | + mSourceName
|
---|
| 446 | );
|
---|
| 447 | }
|
---|
| 448 | catch (...)
|
---|
| 449 | {
|
---|
| 450 | LogManager::getSingleton().logMessage( "Unkown exception while trying to parse: "
|
---|
| 451 | + getClientGrammerName()
|
---|
| 452 | + ": "
|
---|
| 453 | + mSourceName
|
---|
| 454 | );
|
---|
| 455 | }
|
---|
| 456 |
|
---|
| 457 | return passed;
|
---|
| 458 |
|
---|
| 459 | }
|
---|
| 460 |
|
---|
| 461 | //-----------------------------------------------------------------------
|
---|
| 462 | bool Compiler2Pass::doPass2()
|
---|
| 463 | {
|
---|
| 464 | bool passed = true;
|
---|
| 465 | // step through tokens container and execute until end found or error occurs
|
---|
| 466 |
|
---|
| 467 | return passed;
|
---|
| 468 | }
|
---|
| 469 |
|
---|
| 470 | //-----------------------------------------------------------------------
|
---|
| 471 | const Compiler2Pass::TokenInst& Compiler2Pass::getNextToken(const size_t expectedTokenID)
|
---|
| 472 | {
|
---|
| 473 | //static TokenInst badToken;
|
---|
| 474 | // advance instruction que index by one then get the current token instruction
|
---|
| 475 | if (mPass2TokenQuePosition < mActiveTokenState->tokenQue.size() - 1)
|
---|
| 476 | {
|
---|
| 477 | ++mPass2TokenQuePosition;
|
---|
| 478 | const TokenInst& tokenInst = mActiveTokenState->tokenQue[mPass2TokenQuePosition];
|
---|
| 479 | if (expectedTokenID > 0 && (tokenInst.tokenID != expectedTokenID))
|
---|
| 480 | {
|
---|
| 481 | OGRE_EXCEPT(Exception::ERR_ITEM_NOT_FOUND, getClientGrammerName() + ":" + mSourceName
|
---|
| 482 | + ", expected token ID not found" ,
|
---|
| 483 | "Compiler2Pass::getNextToken");
|
---|
| 484 | }
|
---|
| 485 |
|
---|
| 486 | return tokenInst;
|
---|
| 487 | }
|
---|
| 488 | else
|
---|
| 489 | // no more tokens left for pass 2 processing
|
---|
| 490 | OGRE_EXCEPT(Exception::ERR_ITEM_NOT_FOUND, getClientGrammerName() + ":" + mSourceName
|
---|
| 491 | + ", no more tokens available for pass 2 processing" ,
|
---|
| 492 | "Compiler2Pass::getNextToken");
|
---|
| 493 | }
|
---|
| 494 | //-----------------------------------------------------------------------
|
---|
| 495 | const Compiler2Pass::TokenInst& Compiler2Pass::getCurrentToken(void)
|
---|
| 496 | {
|
---|
| 497 | if (mPass2TokenQuePosition < mActiveTokenState->tokenQue.size() - 1)
|
---|
| 498 | return mActiveTokenState->tokenQue[mPass2TokenQuePosition];
|
---|
| 499 | else
|
---|
| 500 | OGRE_EXCEPT(Exception::ERR_ITEM_NOT_FOUND, getClientGrammerName() + ":" + mSourceName
|
---|
| 501 | + "no token available, all pass 2 tokens processed" ,
|
---|
| 502 | "Compiler2Pass::getCurrentToken");
|
---|
| 503 | }
|
---|
| 504 | //-----------------------------------------------------------------------
|
---|
| 505 | bool Compiler2Pass::testNextTokenID(const size_t expectedTokenID)
|
---|
| 506 | {
|
---|
| 507 | bool passed = false;
|
---|
| 508 | const size_t nextTokenIndex = mPass2TokenQuePosition + 1;
|
---|
| 509 | if (nextTokenIndex < mActiveTokenState->tokenQue.size() - 1)
|
---|
| 510 | passed = mActiveTokenState->tokenQue[nextTokenIndex].tokenID == expectedTokenID;
|
---|
| 511 |
|
---|
| 512 | return passed;
|
---|
| 513 | }
|
---|
| 514 | //-----------------------------------------------------------------------
|
---|
| 515 | void Compiler2Pass::replaceToken(void)
|
---|
| 516 | {
|
---|
| 517 | // move instruction que index back one position
|
---|
| 518 | if (mPass2TokenQuePosition > 0)
|
---|
| 519 | --mPass2TokenQuePosition;
|
---|
| 520 | }
|
---|
| 521 | //-----------------------------------------------------------------------
|
---|
| 522 | float Compiler2Pass::getNextTokenValue(void)
|
---|
| 523 | {
|
---|
| 524 | // get float value from current token instruction
|
---|
| 525 | if (getNextToken().tokenID == _value_)
|
---|
| 526 | return mConstants[mPass2TokenQuePosition];
|
---|
| 527 | else
|
---|
| 528 | {
|
---|
| 529 | const TokenInst& token = getCurrentToken();
|
---|
| 530 | // if token is not for a value then throw an exception
|
---|
| 531 | OGRE_EXCEPT(Exception::ERR_ITEM_NOT_FOUND, "In " + mSourceName +
|
---|
| 532 | ", Line " + StringConverter::toString(token.line) +
|
---|
| 533 | ", token is not for a value. Found: >>>" + mSource->substr(token.pos, 20) +
|
---|
| 534 | "<<<",
|
---|
| 535 | "Compiler2Pass::getNextTokenValue");
|
---|
| 536 | }
|
---|
| 537 | }
|
---|
| 538 | //-----------------------------------------------------------------------
|
---|
| 539 | const String& Compiler2Pass::getNextTokenLabel(void)
|
---|
| 540 | {
|
---|
| 541 | // get label from current token instruction
|
---|
| 542 | if (getNextToken().tokenID == _character_)
|
---|
| 543 | return mLabels[mPass2TokenQuePosition];
|
---|
| 544 | else
|
---|
| 545 | {
|
---|
| 546 | const TokenInst& token = getCurrentToken();
|
---|
| 547 | // if token is not for a label then throw an exception
|
---|
| 548 | OGRE_EXCEPT(Exception::ERR_ITEM_NOT_FOUND, "In " + mSourceName +
|
---|
| 549 | ", Line " + StringConverter::toString(token.line) +
|
---|
| 550 | ", token is not for a label. Found: >>>" + mSource->substr(token.pos, 20) +
|
---|
| 551 | "<<<",
|
---|
| 552 | "Compiler2Pass::getNextTokenLabel");
|
---|
| 553 | }
|
---|
| 554 | }
|
---|
| 555 | //-----------------------------------------------------------------------
|
---|
| 556 | size_t Compiler2Pass::getPass2TokenQueCount(void) const
|
---|
| 557 | {
|
---|
| 558 | // calculate number of tokens between current token instruction and next token with action
|
---|
| 559 | if(mActiveTokenState->tokenQue.size() > mPass2TokenQuePosition)
|
---|
| 560 | return mActiveTokenState->tokenQue.size() - 1 - mPass2TokenQuePosition;
|
---|
| 561 | else
|
---|
| 562 | return 0;
|
---|
| 563 | }
|
---|
| 564 | //-----------------------------------------------------------------------
|
---|
| 565 | size_t Compiler2Pass::getRemainingTokensForAction(void) const
|
---|
| 566 | {
|
---|
| 567 | size_t remaingingTokens = getPass2TokenQueCount();
|
---|
| 568 | // don't count token for next action
|
---|
| 569 | if (remaingingTokens > 0)
|
---|
| 570 | --remaingingTokens;
|
---|
| 571 | return remaingingTokens;
|
---|
| 572 | }
|
---|
| 573 |
|
---|
| 574 | //-----------------------------------------------------------------------
|
---|
| 575 | void Compiler2Pass::setClientBNFGrammer(void)
|
---|
| 576 | {
|
---|
| 577 | // switch to internal BNF Containers
|
---|
| 578 | // clear client containers
|
---|
| 579 | mClientTokenState = &mClientTokenStates[getClientGrammerName()];
|
---|
| 580 | // attempt to compile the grammer into a rule base if no rules exist
|
---|
| 581 | if (mClientTokenState->rootRulePath.size() == 0)
|
---|
| 582 | {
|
---|
| 583 | mClientTokenState->tokenQue.reserve(100);
|
---|
| 584 | mClientTokenState->lexemeTokenDefinitions.reserve(100);
|
---|
| 585 | // first entry in rule path is set as a bad entry and no token should reference it
|
---|
| 586 | mClientTokenState->rootRulePath.resize(1);
|
---|
| 587 | // allow the client to setup token definitions prior to
|
---|
| 588 | // compiling the BNF grammer
|
---|
| 589 | // ensure token definitions are added to the client state
|
---|
| 590 | mActiveTokenState = mClientTokenState;
|
---|
| 591 | // get client to setup token definitions and actions it wants to know about
|
---|
| 592 | setupTokenDefinitions();
|
---|
| 593 | // make sure active token state is for BNF compiling
|
---|
| 594 | mActiveTokenState = &mBNFTokenState;
|
---|
| 595 | mSource = &getClientBNFGrammer();
|
---|
| 596 |
|
---|
| 597 | if (doPass1())
|
---|
| 598 | {
|
---|
| 599 | buildClientBNFRulePaths();
|
---|
| 600 | }
|
---|
| 601 | else
|
---|
| 602 | {
|
---|
| 603 | OGRE_EXCEPT(Exception::ERR_INTERNAL_ERROR, "BNF Grammar compilation failed for " +
|
---|
| 604 | getClientGrammerName(), "Compiler2Pass::setClientBNFGrammer");
|
---|
| 605 | }
|
---|
| 606 | // change token state to client data after compiling grammer
|
---|
| 607 | mActiveTokenState = mClientTokenState;
|
---|
| 608 | // verify the client rule paths and associated terminal and non-terminal lexemes
|
---|
| 609 | verifyTokenRuleLinks(getClientGrammerName());
|
---|
| 610 | }
|
---|
| 611 | }
|
---|
| 612 |
|
---|
| 613 | //-----------------------------------------------------------------------
|
---|
| 614 | bool Compiler2Pass::processRulePath( size_t rulepathIDX)
|
---|
| 615 | {
|
---|
| 616 | // rule path determines what tokens and therefore what lexemes are acceptable from the source
|
---|
| 617 | // it is assumed that the tokens with the longest similar lexemes are arranged first so
|
---|
| 618 | // if a match is found it is accepted and no further searching is done
|
---|
| 619 |
|
---|
| 620 | if (rulepathIDX >= mActiveTokenState->rootRulePath.size())
|
---|
| 621 | {
|
---|
| 622 | // This is very bad and no way to recover so raise exception
|
---|
| 623 | OGRE_EXCEPT(Exception::ERR_INTERNAL_ERROR, "rule ID exceeds rule base bounds.", "Compiler2Pass::processRulePath");
|
---|
| 624 | }
|
---|
| 625 | // record position of last token in container
|
---|
| 626 | // to be used as the rollback position if a valid token is not found
|
---|
| 627 | const size_t TokenContainerOldSize = mActiveTokenState->tokenQue.size();
|
---|
| 628 | const size_t OldCharPos = mCharPos;
|
---|
| 629 | const size_t OldLinePos = mCurrentLine;
|
---|
| 630 | const bool OldLabelIsActive = mLabelIsActive;
|
---|
| 631 | const size_t OldActiveLabelKey = mActiveLabelKey;
|
---|
| 632 | const String OldLabel = mLabels[OldActiveLabelKey];
|
---|
| 633 |
|
---|
| 634 | // keep track of what non-terminal token activated the rule
|
---|
| 635 | size_t ActiveNTTRule = mActiveTokenState->rootRulePath[rulepathIDX].tokenID;
|
---|
| 636 | // start rule path at next position for definition
|
---|
| 637 | ++rulepathIDX;
|
---|
| 638 |
|
---|
| 639 | // assume the rule will pass
|
---|
| 640 | bool passed = true;
|
---|
| 641 | bool tokenFound = false;
|
---|
| 642 | bool endFound = false;
|
---|
| 643 | bool parseErrorLogged = false;
|
---|
| 644 |
|
---|
| 645 | // keep following rulepath until the end is reached
|
---|
| 646 | while (!endFound)
|
---|
| 647 | {
|
---|
| 648 | switch (mActiveTokenState->rootRulePath[rulepathIDX].operation)
|
---|
| 649 | {
|
---|
| 650 |
|
---|
| 651 | case otAND:
|
---|
| 652 | // only validate if the previous rule passed
|
---|
| 653 | if (passed)
|
---|
| 654 | passed = ValidateToken(rulepathIDX, ActiveNTTRule);
|
---|
| 655 | // log error message if a previouse token was found in this rule path and current token failed
|
---|
| 656 | if (tokenFound && !parseErrorLogged && !passed)
|
---|
| 657 | {
|
---|
| 658 | parseErrorLogged = true;
|
---|
| 659 | LogManager::getSingleton().logMessage(
|
---|
| 660 | "*** ERROR in : " + getClientGrammerName() +
|
---|
| 661 | "\nSource: " + mSourceName +
|
---|
| 662 | "\nUnkown token found, was expecting: " + getBNFGrammerTextFromRulePath(rulepathIDX)
|
---|
| 663 | );
|
---|
| 664 | LogManager::getSingleton().logMessage(
|
---|
| 665 | " Found: >>>" + mSource->substr(mCharPos, 20) +
|
---|
| 666 | "<<<, while in rule path: <" + mActiveTokenState->lexemeTokenDefinitions[ActiveNTTRule].lexeme
|
---|
| 667 | + ">"
|
---|
| 668 | );
|
---|
| 669 | }
|
---|
| 670 |
|
---|
| 671 | break;
|
---|
| 672 |
|
---|
| 673 | case otOR:
|
---|
| 674 | // only validate if the previous rule failed
|
---|
| 675 | if ( passed == false )
|
---|
| 676 | {
|
---|
| 677 | // clear previous tokens from entry and try again
|
---|
| 678 | mActiveTokenState->tokenQue.resize(TokenContainerOldSize);
|
---|
| 679 | passed = ValidateToken(rulepathIDX, ActiveNTTRule);
|
---|
| 680 | }
|
---|
| 681 | else
|
---|
| 682 | {
|
---|
| 683 | // path passed up to this point therefore finished so pretend end marker found
|
---|
| 684 | endFound = true;
|
---|
| 685 | }
|
---|
| 686 | break;
|
---|
| 687 |
|
---|
| 688 | case otOPTIONAL:
|
---|
| 689 | // if previous passed then try this rule but it does not effect succes of rule since its optional
|
---|
| 690 | if(passed) ValidateToken(rulepathIDX, ActiveNTTRule);
|
---|
| 691 | break;
|
---|
| 692 |
|
---|
| 693 | case otREPEAT:
|
---|
| 694 | // repeat until called rule fails or cursor does not advance
|
---|
| 695 | // repeat is 0 or more times
|
---|
| 696 | if (passed)
|
---|
| 697 | {
|
---|
| 698 | // keep calling until failure or no change in cursor position
|
---|
| 699 | size_t prevPos = mCharPos;
|
---|
| 700 | while ( ValidateToken(rulepathIDX, ActiveNTTRule))
|
---|
| 701 | {
|
---|
| 702 | if (mCharPos > prevPos)
|
---|
| 703 | {
|
---|
| 704 | prevPos = mCharPos;
|
---|
| 705 | }
|
---|
| 706 | else
|
---|
| 707 | {
|
---|
| 708 | // repeat failed to advance the cursor position so time to quit since the repeating rule
|
---|
| 709 | // path isn't finding anything
|
---|
| 710 | // this can happen if the rule being called only has _optional_ rules
|
---|
| 711 | // this checking of the cursor positions prevents infinite loop from occuring
|
---|
| 712 | break;
|
---|
| 713 | }
|
---|
| 714 | }
|
---|
| 715 | }
|
---|
| 716 | break;
|
---|
| 717 |
|
---|
| 718 | case otDATA:
|
---|
| 719 | // skip it, should have been handled by previous operation.
|
---|
| 720 | break;
|
---|
| 721 |
|
---|
| 722 | case otNOT_TEST:
|
---|
| 723 | // only validate if the previous rule passed
|
---|
| 724 | if (passed)
|
---|
| 725 | {
|
---|
| 726 |
|
---|
| 727 | // perform look ahead and test if rule production fails
|
---|
| 728 | const size_t la_TokenContainerOldSize = mActiveTokenState->tokenQue.size();
|
---|
| 729 | const size_t la_OldCharPos = mCharPos;
|
---|
| 730 | const size_t la_OldLinePos = mCurrentLine;
|
---|
| 731 | const bool la_OldLabelIsActive = mLabelIsActive;
|
---|
| 732 | const size_t la_OldActiveLabelKey = mActiveLabelKey;
|
---|
| 733 | const String la_OldLabel = mLabels[la_OldActiveLabelKey];
|
---|
| 734 |
|
---|
| 735 | passed = !ValidateToken(rulepathIDX, ActiveNTTRule);
|
---|
| 736 |
|
---|
| 737 | // only wanted to take a peek as to what was ahead so now restore back to current position
|
---|
| 738 | mActiveTokenState->tokenQue.resize(la_TokenContainerOldSize);
|
---|
| 739 | mCharPos = la_OldCharPos;
|
---|
| 740 | mCurrentLine = la_OldLinePos;
|
---|
| 741 | // restor label state if it was active before not test
|
---|
| 742 | if (la_OldLabelIsActive)
|
---|
| 743 | {
|
---|
| 744 | mActiveLabelKey = la_OldActiveLabelKey;
|
---|
| 745 | mLabels[la_OldActiveLabelKey] = la_OldLabel;
|
---|
| 746 | mLabelIsActive = la_OldLabelIsActive;
|
---|
| 747 | }
|
---|
| 748 | // only perform full rollback if tokens found
|
---|
| 749 | if (!passed)
|
---|
| 750 | {
|
---|
| 751 | // the rule did not validate so get rid of tokens decoded
|
---|
| 752 | // roll back the token container end position to what it was when rule started
|
---|
| 753 | // this will get rid of all tokens that had been pushed on the container while
|
---|
| 754 | // trying to validating this rule
|
---|
| 755 | mActiveTokenState->tokenQue.resize(TokenContainerOldSize);
|
---|
| 756 | //mConstants.resize(OldConstantsSize);
|
---|
| 757 | mCharPos = OldCharPos;
|
---|
| 758 | mCurrentLine = OldLinePos;
|
---|
| 759 | // restor label state if it was active before not test
|
---|
| 760 | if (OldLabelIsActive)
|
---|
| 761 | {
|
---|
| 762 | mActiveLabelKey = OldActiveLabelKey;
|
---|
| 763 | mLabels[OldActiveLabelKey] = OldLabel;
|
---|
| 764 | mLabelIsActive = OldLabelIsActive;
|
---|
| 765 | }
|
---|
| 766 |
|
---|
| 767 | // terminate rule production processing
|
---|
| 768 | endFound = true;
|
---|
| 769 | }
|
---|
| 770 | }
|
---|
| 771 | break;
|
---|
| 772 |
|
---|
| 773 | case otEND:
|
---|
| 774 | // end of rule found so time to return
|
---|
| 775 | endFound = true;
|
---|
| 776 | // only rollback if no tokens found
|
---|
| 777 | if (!passed && !tokenFound)
|
---|
| 778 | {
|
---|
| 779 | // the rule did not validate so get rid of tokens decoded
|
---|
| 780 | // roll back the token container end position to what it was when rule started
|
---|
| 781 | // this will get rid of all tokens that had been pushed on the container while
|
---|
| 782 | // trying to validating this rule
|
---|
| 783 | mActiveTokenState->tokenQue.resize(TokenContainerOldSize);
|
---|
| 784 | //mConstants.resize(OldConstantsSize);
|
---|
| 785 | mCharPos = OldCharPos;
|
---|
| 786 | mCurrentLine = OldLinePos;
|
---|
| 787 | }
|
---|
| 788 | else
|
---|
| 789 | {
|
---|
| 790 | // if the rule path was partially completed, one or more tokeks found, then put a
|
---|
| 791 | // warning in the log
|
---|
| 792 | if (!passed && tokenFound && !mLabelIsActive)
|
---|
| 793 | {
|
---|
| 794 | passed = true;
|
---|
| 795 | // log last valid token found
|
---|
| 796 | const TokenInst& tokenInst = mActiveTokenState->tokenQue[mActiveTokenState->tokenQue.size() - 1];
|
---|
| 797 | LogManager::getSingleton().logMessage(
|
---|
| 798 | "Last valid token found was at line: " + StringConverter::toString(tokenInst.line));
|
---|
| 799 | LogManager::getSingleton().logMessage(
|
---|
| 800 | "source hint: >>>" + mSource->substr(tokenInst.pos, 20) + "<<<");
|
---|
| 801 | }
|
---|
| 802 | }
|
---|
| 803 | break;
|
---|
| 804 |
|
---|
| 805 | default:
|
---|
| 806 | // an exception should be raised since the code should never get here
|
---|
| 807 | passed = false;
|
---|
| 808 | endFound = true;
|
---|
| 809 | break;
|
---|
| 810 |
|
---|
| 811 | } // end switch
|
---|
| 812 | // prevent rollback from occuring if a token was found but later part of rule fails
|
---|
| 813 | // this allows pass2 to either fix the problem or report the error and continue on.
|
---|
| 814 | // Don't do this for _no_token_ since its a special system token and has nothing todo with
|
---|
| 815 | // a successfull parse of the source. Can check this by looking at mNoTerminalToken state.
|
---|
| 816 | // if _no_token had just been validated then mNoTerminalToken will be true.
|
---|
| 817 | if (passed && !mNoTerminalToken)
|
---|
| 818 | tokenFound = true;
|
---|
| 819 | // move on to the next rule in the path
|
---|
| 820 | ++rulepathIDX;
|
---|
| 821 | } // end while
|
---|
| 822 |
|
---|
| 823 | return passed;
|
---|
| 824 | }
|
---|
| 825 |
|
---|
| 826 | //-----------------------------------------------------------------------
|
---|
| 827 | bool Compiler2Pass::isCharacterLabel(const size_t rulepathIDX)
|
---|
| 828 | {
|
---|
| 829 | // assume the test is going to fail
|
---|
| 830 | bool Passed = false;
|
---|
| 831 |
|
---|
| 832 | // get token from next rule operation
|
---|
| 833 | // token string is list of valid single characters
|
---|
| 834 | // compare character at current cursor position in script to characters in list for a match
|
---|
| 835 | // if match found then add character to active label
|
---|
| 836 | // _character_ will not have a token definition but the next rule operation should be
|
---|
| 837 | // DATA and have the token ID required to get the character set.
|
---|
| 838 | const TokenRule& rule = mActiveTokenState->rootRulePath[rulepathIDX + 1];
|
---|
| 839 | if (rule.operation == otDATA)
|
---|
| 840 | {
|
---|
| 841 | const size_t TokenID = rule.tokenID;
|
---|
| 842 | if (mActiveTokenState->lexemeTokenDefinitions[TokenID].lexeme.find((*mSource)[mCharPos]) != String::npos)
|
---|
| 843 | {
|
---|
| 844 | // is a new label starting?
|
---|
| 845 | // if mLabelIsActive is false then starting a new label so need a new mActiveLabelKey
|
---|
| 846 | if (!mLabelIsActive)
|
---|
| 847 | {
|
---|
| 848 | // mActiveLabelKey will be the end of the instruction container ie the size of mTokenInstructions
|
---|
| 849 | mActiveLabelKey = mActiveTokenState->tokenQue.size();
|
---|
| 850 | mLabelIsActive = true;
|
---|
| 851 | mNoSpaceSkip = true;
|
---|
| 852 | // reset the contents of the label since it might have been used prior to a rollback
|
---|
| 853 | mLabels[mActiveLabelKey] = "";
|
---|
| 854 | }
|
---|
| 855 | // add the single character to the end of the active label
|
---|
| 856 | mLabels[mActiveLabelKey] += (*mSource)[mCharPos];
|
---|
| 857 | Passed = true;
|
---|
| 858 | }
|
---|
| 859 | }
|
---|
| 860 |
|
---|
| 861 | return Passed;
|
---|
| 862 | }
|
---|
| 863 | //-----------------------------------------------------------------------
|
---|
| 864 | bool Compiler2Pass::ValidateToken(const size_t rulepathIDX, const size_t activeRuleID)
|
---|
| 865 | {
|
---|
| 866 | size_t tokenlength = 0;
|
---|
| 867 | // assume the test is going to fail
|
---|
| 868 | bool Passed = false;
|
---|
| 869 | size_t tokenID = mActiveTokenState->rootRulePath[rulepathIDX].tokenID;
|
---|
| 870 | // if terminal token then compare text of lexeme with what is in source
|
---|
| 871 | if ( (tokenID >= SystemTokenBase) ||
|
---|
| 872 | !mActiveTokenState->lexemeTokenDefinitions[tokenID].isNonTerminal )
|
---|
| 873 | {
|
---|
| 874 | if (tokenID != _character_)
|
---|
| 875 | {
|
---|
| 876 | mLabelIsActive = false;
|
---|
| 877 | // allow spaces to be skipped for next lexeme processing
|
---|
| 878 | mNoSpaceSkip = false;
|
---|
| 879 | }
|
---|
| 880 |
|
---|
| 881 | if (tokenID == _no_space_skip_)
|
---|
| 882 | {
|
---|
| 883 | // don't skip spaces to get to next lexeme
|
---|
| 884 | mNoSpaceSkip = true;
|
---|
| 885 | // move on to next rule
|
---|
| 886 | Passed = true;
|
---|
| 887 | }
|
---|
| 888 | else if (tokenID == _no_token_)
|
---|
| 889 | {
|
---|
| 890 | // turn on no terminal token processing for next rule
|
---|
| 891 | mNoTerminalToken = true;
|
---|
| 892 | // move on to next rule
|
---|
| 893 | Passed = true;
|
---|
| 894 | }
|
---|
| 895 | // if label processing is active ie previous token was _character_
|
---|
| 896 | // and current token is supposed to be a _character_ then don't
|
---|
| 897 | // position to next lexeme in source
|
---|
| 898 | else if (mNoSpaceSkip || positionToNextLexeme())
|
---|
| 899 | {
|
---|
| 900 | // if Token is supposed to be a number then check if its a numerical constant
|
---|
| 901 | if (tokenID == _value_)
|
---|
| 902 | {
|
---|
| 903 | float constantvalue = 0.0f;
|
---|
| 904 | if (Passed = isFloatValue(constantvalue, tokenlength))
|
---|
| 905 | {
|
---|
| 906 | // key is the next instruction index
|
---|
| 907 | mConstants[mActiveTokenState->tokenQue.size()] = constantvalue;
|
---|
| 908 | }
|
---|
| 909 | }
|
---|
| 910 | else // check if user label or valid keyword token
|
---|
| 911 | {
|
---|
| 912 | if (tokenID == _character_)
|
---|
| 913 | {
|
---|
| 914 | if (Passed = isCharacterLabel(rulepathIDX))
|
---|
| 915 | // only one character was processed
|
---|
| 916 | tokenlength = 1;
|
---|
| 917 | }
|
---|
| 918 | else
|
---|
| 919 | {
|
---|
| 920 |
|
---|
| 921 | // compare token lexeme text with source text
|
---|
| 922 | if (Passed = isLexemeMatch(mActiveTokenState->lexemeTokenDefinitions[tokenID].lexeme, mActiveTokenState->lexemeTokenDefinitions[tokenID].isCaseSensitive))
|
---|
| 923 | {
|
---|
| 924 | tokenlength = mActiveTokenState->lexemeTokenDefinitions[tokenID].lexeme.length();
|
---|
| 925 | // check if terminal token should be ignored ie not put in instruction que
|
---|
| 926 | if (mNoTerminalToken)
|
---|
| 927 | tokenID = _no_token_;
|
---|
| 928 | }
|
---|
| 929 | // always clear no terminal token flag. it only works for one pending terminal token.
|
---|
| 930 | mNoTerminalToken = false;
|
---|
| 931 | }
|
---|
| 932 | }
|
---|
| 933 |
|
---|
| 934 | // turn off label processing if token ID was not for _character_
|
---|
| 935 | if (tokenID == _character_)
|
---|
| 936 | {
|
---|
| 937 | // _character_ token being processed
|
---|
| 938 | // turn off generation of a new token instruction if this is not
|
---|
| 939 | // the first _character_ in a sequence of _character_ terminal tokens.
|
---|
| 940 | // Only want one _character_ token which Identifies a label
|
---|
| 941 |
|
---|
| 942 | if (mActiveTokenState->tokenQue.size() > mActiveLabelKey)
|
---|
| 943 | {
|
---|
| 944 | // this token is not the first _character_ in the label sequence
|
---|
| 945 | // so turn off the token by turning TokenID into _no_token_
|
---|
| 946 | tokenID = _no_token_;
|
---|
| 947 | }
|
---|
| 948 | }
|
---|
| 949 |
|
---|
| 950 | // if valid terminal token was found then add it to the instruction container for pass 2 processing
|
---|
| 951 | if (Passed)
|
---|
| 952 | {
|
---|
| 953 | if (tokenID != _no_token_)
|
---|
| 954 | {
|
---|
| 955 | TokenInst newtoken;
|
---|
| 956 | // push token onto end of container
|
---|
| 957 | newtoken.tokenID = tokenID;
|
---|
| 958 | newtoken.NTTRuleID = activeRuleID;
|
---|
| 959 | newtoken.line = mCurrentLine;
|
---|
| 960 | newtoken.pos = mCharPos;
|
---|
| 961 | newtoken.found = true;
|
---|
| 962 |
|
---|
| 963 | mActiveTokenState->tokenQue.push_back(newtoken);
|
---|
| 964 | // token action processing
|
---|
| 965 | // if the token has an action then fire previous token action
|
---|
| 966 | checkTokenActionTrigger();
|
---|
| 967 | }
|
---|
| 968 |
|
---|
| 969 | // update source position
|
---|
| 970 | mCharPos += tokenlength;
|
---|
| 971 | } // end if
|
---|
| 972 | } // end else if
|
---|
| 973 |
|
---|
| 974 | }
|
---|
| 975 | // else a non terminal token was found
|
---|
| 976 | else
|
---|
| 977 | {
|
---|
| 978 | // execute rule for non-terminal
|
---|
| 979 | // get rule_ID for index into rulepath to be called
|
---|
| 980 | Passed = processRulePath(mActiveTokenState->lexemeTokenDefinitions[tokenID].ruleID);
|
---|
| 981 | }
|
---|
| 982 |
|
---|
| 983 | return Passed;
|
---|
| 984 | }
|
---|
| 985 |
|
---|
| 986 | //-----------------------------------------------------------------------
|
---|
| 987 | bool Compiler2Pass::isFloatValue(float& fvalue, size_t& charsize) const
|
---|
| 988 | {
|
---|
| 989 | // check to see if it is a numeric float value
|
---|
| 990 | bool valuefound = false;
|
---|
| 991 |
|
---|
| 992 | const char* startptr = mSource->c_str() + mCharPos;
|
---|
| 993 | char* endptr = NULL;
|
---|
| 994 |
|
---|
| 995 | fvalue = (float)strtod(startptr, &endptr);
|
---|
| 996 | // if a valid float was found then endptr will have the pointer to the first invalid character
|
---|
| 997 | if (endptr)
|
---|
| 998 | {
|
---|
| 999 | if (endptr>startptr)
|
---|
| 1000 | {
|
---|
| 1001 | // a valid value was found so process it
|
---|
| 1002 | charsize = endptr - startptr;
|
---|
| 1003 | valuefound = true;
|
---|
| 1004 | }
|
---|
| 1005 | }
|
---|
| 1006 |
|
---|
| 1007 | return valuefound;
|
---|
| 1008 | }
|
---|
| 1009 |
|
---|
| 1010 | //-----------------------------------------------------------------------
|
---|
| 1011 | bool Compiler2Pass::isLexemeMatch(const String& lexeme, const bool caseSensitive) const
|
---|
| 1012 | {
|
---|
| 1013 | // compare text at source+charpos with the lexeme : limit testing to lexeme size
|
---|
| 1014 | if (!caseSensitive)
|
---|
| 1015 | {
|
---|
| 1016 | String testItem = mSource->substr(mCharPos, lexeme.length());
|
---|
| 1017 | StringUtil::toLowerCase(testItem);
|
---|
| 1018 | return (testItem.compare(lexeme) == 0);
|
---|
| 1019 | }
|
---|
| 1020 | else
|
---|
| 1021 | {
|
---|
| 1022 | return (mSource->compare(mCharPos, lexeme.length(), lexeme) == 0);
|
---|
| 1023 | }
|
---|
| 1024 | }
|
---|
| 1025 |
|
---|
| 1026 | //-----------------------------------------------------------------------
|
---|
| 1027 | bool Compiler2Pass::positionToNextLexeme()
|
---|
| 1028 | {
|
---|
| 1029 | bool validlexemefound = false;
|
---|
| 1030 | bool endofsource = mCharPos >= mEndOfSource;
|
---|
| 1031 |
|
---|
| 1032 | while (!validlexemefound && !endofsource)
|
---|
| 1033 | {
|
---|
| 1034 | skipWhiteSpace();
|
---|
| 1035 | skipEOL();
|
---|
| 1036 | skipComments();
|
---|
| 1037 | // have we reached the end of the string?
|
---|
| 1038 | if (mCharPos >= mEndOfSource)
|
---|
| 1039 | endofsource = true;
|
---|
| 1040 | else
|
---|
| 1041 | {
|
---|
| 1042 | // if ASCII > space then assume valid character is found
|
---|
| 1043 | if ((*mSource)[mCharPos] > ' ') validlexemefound = true;
|
---|
| 1044 | }
|
---|
| 1045 | }// end of while
|
---|
| 1046 |
|
---|
| 1047 | return validlexemefound;
|
---|
| 1048 | }
|
---|
| 1049 |
|
---|
| 1050 | //-----------------------------------------------------------------------
|
---|
| 1051 | void Compiler2Pass::skipComments()
|
---|
| 1052 | {
|
---|
| 1053 | if (mCharPos >= mEndOfSource)
|
---|
| 1054 | return;
|
---|
| 1055 | // if current char and next are // then search for EOL
|
---|
| 1056 | if (mSource->compare(mCharPos, 2, "//") == 0)
|
---|
| 1057 | findEOL();
|
---|
| 1058 | }
|
---|
| 1059 |
|
---|
| 1060 | //-----------------------------------------------------------------------
|
---|
| 1061 | void Compiler2Pass::findEOL()
|
---|
| 1062 | {
|
---|
| 1063 | if (mCharPos >= mEndOfSource)
|
---|
| 1064 | return;
|
---|
| 1065 | // find eol charter and move to this position
|
---|
| 1066 | mCharPos = mSource->find('\n', mCharPos);
|
---|
| 1067 | }
|
---|
| 1068 |
|
---|
| 1069 | //-----------------------------------------------------------------------
|
---|
| 1070 | void Compiler2Pass::skipEOL()
|
---|
| 1071 | {
|
---|
| 1072 | if (mCharPos >= mEndOfSource)
|
---|
| 1073 | return;
|
---|
| 1074 |
|
---|
| 1075 | if (((*mSource)[mCharPos] == '\n') || ((*mSource)[mCharPos] == '\r'))
|
---|
| 1076 | {
|
---|
| 1077 | mCurrentLine++;
|
---|
| 1078 | mCharPos++;
|
---|
| 1079 | if (mCharPos >= mEndOfSource)
|
---|
| 1080 | return;
|
---|
| 1081 | if (((*mSource)[mCharPos] == '\n') || ((*mSource)[mCharPos] == '\r'))
|
---|
| 1082 | {
|
---|
| 1083 | mCharPos++;
|
---|
| 1084 | }
|
---|
| 1085 | }
|
---|
| 1086 | }
|
---|
| 1087 |
|
---|
| 1088 | //-----------------------------------------------------------------------
|
---|
| 1089 | void Compiler2Pass::skipWhiteSpace()
|
---|
| 1090 | {
|
---|
| 1091 | if (mCharPos >= mEndOfSource)
|
---|
| 1092 | return;
|
---|
| 1093 |
|
---|
| 1094 | mCharPos = mSource->find_first_not_of(" \t", mCharPos);
|
---|
| 1095 | }
|
---|
| 1096 |
|
---|
| 1097 | //-----------------------------------------------------------------------
|
---|
| 1098 | void Compiler2Pass::addLexemeToken(const String& lexeme, const size_t token, const bool hasAction, const bool caseSensitive)
|
---|
| 1099 | {
|
---|
| 1100 | if (token >= mActiveTokenState->lexemeTokenDefinitions.size())
|
---|
| 1101 | mActiveTokenState->lexemeTokenDefinitions.resize(token + 1);
|
---|
| 1102 | // since resizing guarentees the token definition will exist, just assign values to members
|
---|
| 1103 | LexemeTokenDef& tokenDef = mActiveTokenState->lexemeTokenDefinitions[token];
|
---|
| 1104 | if (tokenDef.ID != 0)
|
---|
| 1105 | {
|
---|
| 1106 | OGRE_EXCEPT(Exception::ERR_DUPLICATE_ITEM, "In " + getClientGrammerName() +
|
---|
| 1107 | ", lexeme >>>" +
|
---|
| 1108 | lexeme + "<<< already exists in lexeme token definitions",
|
---|
| 1109 | "Compiler2Pass::addLexemeToken");
|
---|
| 1110 | }
|
---|
| 1111 | tokenDef.ID = token;
|
---|
| 1112 | tokenDef.lexeme = lexeme;
|
---|
| 1113 | if (!caseSensitive)
|
---|
| 1114 | StringUtil::toLowerCase(tokenDef.lexeme);
|
---|
| 1115 | tokenDef.hasAction = hasAction;
|
---|
| 1116 | tokenDef.isCaseSensitive = caseSensitive;
|
---|
| 1117 |
|
---|
| 1118 | mActiveTokenState->lexemeTokenMap[lexeme] = token;
|
---|
| 1119 | }
|
---|
| 1120 |
|
---|
| 1121 | //-----------------------------------------------------------------------
|
---|
| 1122 | void Compiler2Pass::checkTokenActionTrigger(void)
|
---|
| 1123 | {
|
---|
| 1124 | size_t lastTokenQuePos = mActiveTokenState->tokenQue.size();
|
---|
| 1125 | // if there are no token instructions in the que then there is nothing todo
|
---|
| 1126 | if (lastTokenQuePos == 0)
|
---|
| 1127 | return;
|
---|
| 1128 |
|
---|
| 1129 | --lastTokenQuePos;
|
---|
| 1130 | // if last token index is zero and previous action position are zero or the two are the same then do nothing
|
---|
| 1131 | if (lastTokenQuePos == mPreviousActionQuePosition)
|
---|
| 1132 | return;
|
---|
| 1133 |
|
---|
| 1134 | const size_t lastTokenID = mActiveTokenState->tokenQue.at(lastTokenQuePos).tokenID;
|
---|
| 1135 | // dont check actions for system token ID since they are not in lexemeTokenDefinitions
|
---|
| 1136 | if (lastTokenID >= SystemTokenBase)
|
---|
| 1137 | return;
|
---|
| 1138 |
|
---|
| 1139 | // check action trigger if last token has an action
|
---|
| 1140 | if (mActiveTokenState->lexemeTokenDefinitions.at(lastTokenID).hasAction)
|
---|
| 1141 | {
|
---|
| 1142 | // only activate the action belonging to the token found previously
|
---|
| 1143 | activatePreviousTokenAction();
|
---|
| 1144 | // current token action now becomes the previous one
|
---|
| 1145 | mPreviousActionQuePosition = lastTokenQuePos;
|
---|
| 1146 | }
|
---|
| 1147 | }
|
---|
| 1148 |
|
---|
| 1149 | //-----------------------------------------------------------------------
|
---|
| 1150 | String Compiler2Pass::getBNFGrammerTextFromRulePath(size_t ruleID)
|
---|
| 1151 | {
|
---|
| 1152 |
|
---|
| 1153 | String grammerText;
|
---|
| 1154 |
|
---|
| 1155 | // default to using Client rule path
|
---|
| 1156 | // check if index is inbounds
|
---|
| 1157 | if (ruleID >= mActiveTokenState->rootRulePath.size())
|
---|
| 1158 | {
|
---|
| 1159 | OGRE_EXCEPT(Exception::ERR_INTERNAL_ERROR, "rule ID exceeds client rule path bounds.", "Compiler2Pass::getBNFGrammerRulePathText");
|
---|
| 1160 | }
|
---|
| 1161 | // iterate through rule path and get terminal and non-terminal strings
|
---|
| 1162 | const TokenRuleContainer& rulePath = mActiveTokenState->rootRulePath;
|
---|
| 1163 |
|
---|
| 1164 | while (rulePath[ruleID].operation != otEND)
|
---|
| 1165 | {
|
---|
| 1166 | // rule text processing - the op instructions, system tokens
|
---|
| 1167 | switch (rulePath[ruleID].operation)
|
---|
| 1168 | {
|
---|
| 1169 | // rule lexeme ::=
|
---|
| 1170 | case otRULE:
|
---|
| 1171 | grammerText += "\n" + getLexemeText(ruleID) + " ::=";
|
---|
| 1172 | break;
|
---|
| 1173 | // no special processing for AND op
|
---|
| 1174 | case otAND:
|
---|
| 1175 | grammerText += " " + getLexemeText(ruleID);
|
---|
| 1176 | break;
|
---|
| 1177 | // or | lexeme
|
---|
| 1178 | case otOR:
|
---|
| 1179 | grammerText += " | " + getLexemeText(ruleID);
|
---|
| 1180 | break;
|
---|
| 1181 | // optional [lexeme]
|
---|
| 1182 | case otOPTIONAL:
|
---|
| 1183 | grammerText += " [" + getLexemeText(ruleID) + "]";
|
---|
| 1184 | break;
|
---|
| 1185 | // repeat {lexeme}
|
---|
| 1186 | case otREPEAT:
|
---|
| 1187 | grammerText += " {" + getLexemeText(ruleID) + "}";
|
---|
| 1188 | break;
|
---|
| 1189 | // not test (?!lexeme)
|
---|
| 1190 | case otNOT_TEST:
|
---|
| 1191 | grammerText += " (?!" + getLexemeText(ruleID) + ")";
|
---|
| 1192 | break;
|
---|
| 1193 | }
|
---|
| 1194 | // lexeme/token text procesing
|
---|
| 1195 | ++ruleID;
|
---|
| 1196 | }
|
---|
| 1197 |
|
---|
| 1198 | return grammerText;
|
---|
| 1199 | }
|
---|
| 1200 |
|
---|
| 1201 | //-----------------------------------------------------------------------
|
---|
| 1202 |
|
---|
| 1203 | //-----------------------------------------------------------------------
|
---|
| 1204 | // Private Methods
|
---|
| 1205 | //-----------------------------------------------------------------------
|
---|
| 1206 | //-----------------------------------------------------------------------
|
---|
| 1207 | String Compiler2Pass::getLexemeText(size_t& ruleID)
|
---|
| 1208 | {
|
---|
| 1209 | if (ruleID >= mActiveTokenState->rootRulePath.size())
|
---|
| 1210 | {
|
---|
| 1211 | OGRE_EXCEPT(Exception::ERR_INTERNAL_ERROR,
|
---|
| 1212 | "rule ID exceeds client rule path bounds.", "Compiler2Pass::getLexemeText"
|
---|
| 1213 | );
|
---|
| 1214 | }
|
---|
| 1215 |
|
---|
| 1216 | String lexeme;
|
---|
| 1217 |
|
---|
| 1218 | const TokenRuleContainer& rulePath = mActiveTokenState->rootRulePath;
|
---|
| 1219 | const size_t tokenID = rulePath[ruleID].tokenID;
|
---|
| 1220 |
|
---|
| 1221 | if ( tokenID < SystemTokenBase)
|
---|
| 1222 | {
|
---|
| 1223 | // non-terminal tokens
|
---|
| 1224 | if (mActiveTokenState->lexemeTokenDefinitions[tokenID].isNonTerminal)
|
---|
| 1225 | {
|
---|
| 1226 | lexeme = "<" + mActiveTokenState->lexemeTokenDefinitions[tokenID].lexeme + ">";
|
---|
| 1227 | }
|
---|
| 1228 | else // terminal tokens
|
---|
| 1229 | {
|
---|
| 1230 | lexeme = "'" + mActiveTokenState->lexemeTokenDefinitions[tokenID].lexeme + "'";
|
---|
| 1231 | }
|
---|
| 1232 | }
|
---|
| 1233 | else // system token processing
|
---|
| 1234 | {
|
---|
| 1235 | switch (rulePath[ruleID].tokenID)
|
---|
| 1236 | {
|
---|
| 1237 | case _character_:
|
---|
| 1238 | // need to get next rule instruction for data
|
---|
| 1239 | ++ruleID;
|
---|
| 1240 | // data for _character_ is always a set so put () around text string
|
---|
| 1241 | lexeme = "{" + mActiveTokenState->lexemeTokenDefinitions[rulePath[ruleID].tokenID].lexeme + ")";
|
---|
| 1242 | break;
|
---|
| 1243 | case _value_:
|
---|
| 1244 | // <#> - need name of label?
|
---|
| 1245 | lexeme = "<#>";
|
---|
| 1246 | break;
|
---|
| 1247 | }
|
---|
| 1248 | }
|
---|
| 1249 |
|
---|
| 1250 | return lexeme;
|
---|
| 1251 | }
|
---|
| 1252 | //-----------------------------------------------------------------------
|
---|
| 1253 | void Compiler2Pass::activatePreviousTokenAction(void)
|
---|
| 1254 | {
|
---|
| 1255 | const size_t previousTokenID = mActiveTokenState->tokenQue.at(mPreviousActionQuePosition).tokenID;
|
---|
| 1256 | const LexemeTokenDef& tokenDef = mActiveTokenState->lexemeTokenDefinitions.at(previousTokenID);
|
---|
| 1257 | if (tokenDef.hasAction)
|
---|
| 1258 | {
|
---|
| 1259 | // set the current pass 2 token que position to previous action que position
|
---|
| 1260 | // assume that pass 2 processing will use tokens downstream
|
---|
| 1261 | mPass2TokenQuePosition = mPreviousActionQuePosition;
|
---|
| 1262 | executeTokenAction(previousTokenID);
|
---|
| 1263 | }
|
---|
| 1264 | }
|
---|
| 1265 | //-----------------------------------------------------------------------
|
---|
| 1266 | void Compiler2Pass::buildClientBNFRulePaths(void)
|
---|
| 1267 | {
|
---|
| 1268 | bool isFirstToken = true;
|
---|
| 1269 | OperationType pendingRuleOp = otAND;
|
---|
| 1270 |
|
---|
| 1271 | // convert tokens in BNF token que to rule paths
|
---|
| 1272 | while (getPass2TokenQueCount() > 0)
|
---|
| 1273 | {
|
---|
| 1274 | // get a pass 2 token
|
---|
| 1275 | // if this is the first time getting a token then get the current token
|
---|
| 1276 | const TokenInst& currentToken = isFirstToken ? getCurrentToken() : getNextToken();
|
---|
| 1277 | isFirstToken = false;
|
---|
| 1278 | // only process the token if its valid
|
---|
| 1279 | if (currentToken.found)
|
---|
| 1280 | {
|
---|
| 1281 | // a valid token has been found, convert to a rule
|
---|
| 1282 | switch (currentToken.tokenID)
|
---|
| 1283 | {
|
---|
| 1284 | case BNF_ID_BEGIN: // <
|
---|
| 1285 | extractNonTerminal(pendingRuleOp);
|
---|
| 1286 | pendingRuleOp = otAND;
|
---|
| 1287 | break;
|
---|
| 1288 |
|
---|
| 1289 |
|
---|
| 1290 | case BNF_CONSTANT_BEGIN: // <#
|
---|
| 1291 | extractNumericConstant(pendingRuleOp);
|
---|
| 1292 | pendingRuleOp = otAND;
|
---|
| 1293 | break;
|
---|
| 1294 |
|
---|
| 1295 | case BNF_OR: // |
|
---|
| 1296 | pendingRuleOp = otOR;
|
---|
| 1297 | break;
|
---|
| 1298 |
|
---|
| 1299 | case BNF_REPEAT_BEGIN: // {
|
---|
| 1300 | pendingRuleOp = otREPEAT;
|
---|
| 1301 | break;
|
---|
| 1302 |
|
---|
| 1303 | case BNF_NO_TOKEN_START: // '
|
---|
| 1304 | extractTerminal(pendingRuleOp, true);
|
---|
| 1305 | pendingRuleOp = otAND;
|
---|
| 1306 | break;
|
---|
| 1307 |
|
---|
| 1308 | case BNF_SINGLEQUOTE: // '
|
---|
| 1309 | extractTerminal(pendingRuleOp);
|
---|
| 1310 | pendingRuleOp = otAND;
|
---|
| 1311 | break;
|
---|
| 1312 |
|
---|
| 1313 | case BNF_OPTIONAL_BEGIN: // [
|
---|
| 1314 | pendingRuleOp = otOPTIONAL;
|
---|
| 1315 | break;
|
---|
| 1316 |
|
---|
| 1317 | case BNF_NOT_TEST_BEGIN: // (?!
|
---|
| 1318 | pendingRuleOp = otNOT_TEST;
|
---|
| 1319 | break;
|
---|
| 1320 |
|
---|
| 1321 | case BNF_SET_BEGIN: // (
|
---|
| 1322 | extractSet(pendingRuleOp);
|
---|
| 1323 | pendingRuleOp = otAND;
|
---|
| 1324 | break;
|
---|
| 1325 |
|
---|
| 1326 | default:
|
---|
| 1327 | // trap closings ie ] } )
|
---|
| 1328 | break;
|
---|
| 1329 | } // end switch
|
---|
| 1330 | } // end if
|
---|
| 1331 | } // end while
|
---|
| 1332 | }
|
---|
| 1333 |
|
---|
| 1334 | //-----------------------------------------------------------------------
|
---|
| 1335 | void Compiler2Pass::modifyLastRule(const OperationType pendingRuleOp, const size_t tokenID)
|
---|
| 1336 | {
|
---|
| 1337 | // add operation using this token ID to the current rule expression
|
---|
| 1338 | size_t lastIndex = mClientTokenState->rootRulePath.size();
|
---|
| 1339 | if (lastIndex == 0)
|
---|
| 1340 | {
|
---|
| 1341 | // throw exception since there should have been at least one rule existing
|
---|
| 1342 | OGRE_EXCEPT(Exception::ERR_INTERNAL_ERROR, "BNF Grammar build rules failed: no previous rule op defined", "Compiler2Pass::modifyLastRule");
|
---|
| 1343 | }
|
---|
| 1344 | --lastIndex;
|
---|
| 1345 | mClientTokenState->rootRulePath[lastIndex].operation = pendingRuleOp;
|
---|
| 1346 | mClientTokenState->rootRulePath[lastIndex].tokenID = tokenID;
|
---|
| 1347 | // add new end op token rule
|
---|
| 1348 | mClientTokenState->rootRulePath.push_back(TokenRule(otEND, 0));
|
---|
| 1349 | }
|
---|
| 1350 |
|
---|
| 1351 | //-----------------------------------------------------------------------
|
---|
| 1352 | size_t Compiler2Pass::getClientLexemeTokenID(const String& lexeme, const bool isCaseSensitive)
|
---|
| 1353 | {
|
---|
| 1354 | size_t tokenID = mClientTokenState->lexemeTokenMap[lexeme];
|
---|
| 1355 |
|
---|
| 1356 | if (tokenID == 0)
|
---|
| 1357 | {
|
---|
| 1358 | // lexeme not found so a new entry is made by the system
|
---|
| 1359 | // note that all lexemes added by the system will not/can not have an action
|
---|
| 1360 | tokenID = mClientTokenState->lexemeTokenDefinitions.size();
|
---|
| 1361 | // add identifier to client lexeme tokens
|
---|
| 1362 | mActiveTokenState = mClientTokenState;
|
---|
| 1363 | addLexemeToken(lexeme, tokenID, false, isCaseSensitive);
|
---|
| 1364 | mActiveTokenState = &mBNFTokenState;
|
---|
| 1365 | }
|
---|
| 1366 |
|
---|
| 1367 | return tokenID;
|
---|
| 1368 | }
|
---|
| 1369 | //-----------------------------------------------------------------------
|
---|
| 1370 | void Compiler2Pass::extractNonTerminal(const OperationType pendingRuleOp)
|
---|
| 1371 | {
|
---|
| 1372 | // begining of identifier
|
---|
| 1373 | // next token should be for a label
|
---|
| 1374 | const String& identifierLabel = getNextTokenLabel();
|
---|
| 1375 | // next token should be id end
|
---|
| 1376 | getNextToken(BNF_ID_END);
|
---|
| 1377 | // add identifier to lexeme token definitions but keep case sensitivity
|
---|
| 1378 | const size_t tokenID = getClientLexemeTokenID(identifierLabel, true);
|
---|
| 1379 | LexemeTokenDef& tokenDef = mClientTokenState->lexemeTokenDefinitions[tokenID];
|
---|
| 1380 |
|
---|
| 1381 | // peek at the next token isntruction to see if this
|
---|
| 1382 | // identifier is for a new rule or is part of the current rule
|
---|
| 1383 | if (testNextTokenID(BNF_SET_RULE))
|
---|
| 1384 | {
|
---|
| 1385 | // consume set rule
|
---|
| 1386 | getNextToken(BNF_SET_RULE);
|
---|
| 1387 | // check to make sure this is the first time this rule is being setup by
|
---|
| 1388 | // verifying rule id is 0
|
---|
| 1389 | if (tokenDef.ruleID != 0)
|
---|
| 1390 | {
|
---|
| 1391 | // this is not the first time for this identifier to be set up as a rule
|
---|
| 1392 | // since duplicate rules can not exist, throw an exception
|
---|
| 1393 | OGRE_EXCEPT(Exception::ERR_DUPLICATE_ITEM, "while parsing BNF grammer for: " +
|
---|
| 1394 | getClientGrammerName() +
|
---|
| 1395 | ", an attempt was made to assign a rule to identifier: " +
|
---|
| 1396 | tokenDef.lexeme + ", that already had a rule assigned",
|
---|
| 1397 | "Compiler2Pass::extractNonTerminal");
|
---|
| 1398 | }
|
---|
| 1399 | // add new rule to end of rule path
|
---|
| 1400 | mClientTokenState->rootRulePath.push_back(TokenRule(otRULE, tokenID));
|
---|
| 1401 | tokenDef.ruleID = mClientTokenState->rootRulePath.size() - 1;
|
---|
| 1402 | // add new end op token rule
|
---|
| 1403 | mClientTokenState->rootRulePath.push_back(TokenRule(otEND, 0));
|
---|
| 1404 | }
|
---|
| 1405 | else // just a reference to a non-terminal
|
---|
| 1406 | {
|
---|
| 1407 | modifyLastRule(pendingRuleOp, tokenID);
|
---|
| 1408 | }
|
---|
| 1409 |
|
---|
| 1410 | tokenDef.isNonTerminal = true;
|
---|
| 1411 | }
|
---|
| 1412 | //-----------------------------------------------------------------------
|
---|
| 1413 | void Compiler2Pass::extractTerminal(const OperationType pendingRuleOp, const bool notoken)
|
---|
| 1414 | {
|
---|
| 1415 | // begining of label
|
---|
| 1416 | // next token should be for a label
|
---|
| 1417 | const String& terminalLabel = getNextTokenLabel();
|
---|
| 1418 | // next token should be single quote end
|
---|
| 1419 | getNextToken(BNF_SINGLEQUOTE);
|
---|
| 1420 | // add terminal to lexeme token definitions
|
---|
| 1421 | // note that if label not in the map it is automatically added
|
---|
| 1422 | const size_t tokenID = getClientLexemeTokenID(terminalLabel);
|
---|
| 1423 | if (notoken)
|
---|
| 1424 | modifyLastRule(otAND, _no_token_);
|
---|
| 1425 | modifyLastRule(pendingRuleOp, tokenID);
|
---|
| 1426 | }
|
---|
| 1427 | //-----------------------------------------------------------------------
|
---|
| 1428 | void Compiler2Pass::extractSet(const OperationType pendingRuleOp)
|
---|
| 1429 | {
|
---|
| 1430 | const String& setLabel = getNextTokenLabel();
|
---|
| 1431 | // next token should be )
|
---|
| 1432 | getNextToken(BNF_SET_END);
|
---|
| 1433 | // add set to lexeme token definitions but keep case sensitivity
|
---|
| 1434 | const size_t tokenID = getClientLexemeTokenID(setLabel, true);
|
---|
| 1435 | // add operation using this token ID to the current rule expression
|
---|
| 1436 | modifyLastRule(pendingRuleOp, _character_);
|
---|
| 1437 | // add the data required by the character lookup operation
|
---|
| 1438 | modifyLastRule(otDATA, tokenID);
|
---|
| 1439 | }
|
---|
| 1440 | //-----------------------------------------------------------------------
|
---|
| 1441 | void Compiler2Pass::extractNumericConstant(const OperationType pendingRuleOp)
|
---|
| 1442 | {
|
---|
| 1443 | // consume label for constant, don't need it for anything
|
---|
| 1444 | getNextTokenLabel();
|
---|
| 1445 |
|
---|
| 1446 | getNextToken(BNF_ID_END); // >
|
---|
| 1447 | // add operation using this token ID to the current rule expression
|
---|
| 1448 | modifyLastRule(pendingRuleOp, _value_);
|
---|
| 1449 | }
|
---|
| 1450 |
|
---|
| 1451 |
|
---|
| 1452 | }
|
---|