source: NonGTP/Xerces/xerces/include/xercesc/internal/ReaderMgr.hpp @ 358

Revision 358, 18.5 KB checked in by bittner, 19 years ago (diff)

xerces added

Line 
1/*
2 * Copyright 1999-2000,2004 The Apache Software Foundation.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17/*
18 * $Log: ReaderMgr.hpp,v $
19 * Revision 1.14  2004/09/08 13:56:13  peiyongz
20 * Apache License Version 2.0
21 *
22 * Revision 1.13  2004/06/14 15:18:52  peiyongz
23 * Consolidated End Of Line Handling
24 *
25 * Revision 1.12  2004/06/03 15:38:27  peiyongz
26 * XML1.1:  The characters #x85 and #x2028 cannot be reliably recognized
27 * and translated until an entity's encoding declaration (if present) has been
28 * read. Therefore, it is a fatal error to use them within the XML declaration or
29 * text declaration.
30 *
31 * Revision 1.11  2004/01/29 11:46:30  cargilld
32 * Code cleanup changes to get rid of various compiler diagnostic messages.
33 *
34 * Revision 1.10  2003/08/14 02:56:41  knoaman
35 * Code refactoring to improve performance of validation.
36 *
37 * Revision 1.9  2003/05/16 21:36:57  knoaman
38 * Memory manager implementation: Modify constructors to pass in the memory manager.
39 *
40 * Revision 1.8  2003/05/15 18:26:29  knoaman
41 * Partial implementation of the configurable memory manager.
42 *
43 * Revision 1.7  2003/01/03 20:08:39  tng
44 * New feature StandardUriConformant to force strict standard uri conformance.
45 *
46 * Revision 1.6  2002/12/20 22:09:56  tng
47 * XML 1.1
48 *
49 * Revision 1.5  2002/12/03 15:31:19  knoaman
50 * Enable/disable calculation of src offset.
51 *
52 * Revision 1.4  2002/11/04 14:58:18  tng
53 * C++ Namespace Support.
54 *
55 * Revision 1.3  2002/08/22 19:28:11  tng
56 * [Bug 11448] DomCount has problems with XHTML1.1 DTD.
57 *
58 * Revision 1.2  2002/05/27 18:42:14  tng
59 * To get ready for 64 bit large file, use XMLSSize_t to represent line and column number.
60 *
61 * Revision 1.1.1.1  2002/02/01 22:21:58  peiyongz
62 * sane_include
63 *
64 * Revision 1.13  2001/07/12 18:50:08  tng
65 * Some performance modification regarding standalone check and xml decl check.
66 *
67 * Revision 1.12  2000/09/09 00:18:18  andyh
68 * Reordered member variables in ThrowEOEJanitor.  Patch submitted
69 * by Kirk Wylie.
70 *
71 * Revision 1.11  2000/07/08 00:17:13  andyh
72 * Cleanup of yesterday's speedup changes.  Merged new bit into the
73 * scanner character properties table.
74 *
75 * Revision 1.10  2000/07/07 01:08:44  andyh
76 * Parser speed up in scan of XML content.
77 *
78 * Revision 1.9  2000/03/02 19:54:29  roddey
79 * This checkin includes many changes done while waiting for the
80 * 1.1.0 code to be finished. I can't list them all here, but a list is
81 * available elsewhere.
82 *
83 * Revision 1.8  2000/02/24 20:18:07  abagchi
84 * Swat for removing Log from API docs
85 *
86 * Revision 1.7  2000/02/24 02:12:53  aruna1
87 * ReaderMgr:;getReaderDepth() added
88 *
89 * Revision 1.6  2000/02/06 07:47:53  rahulj
90 * Year 2K copyright swat.
91 *
92 * Revision 1.5  2000/01/25 01:04:21  roddey
93 * Fixes a bogus error about ]]> in char data.
94 *
95 * Revision 1.4  2000/01/24 20:40:43  roddey
96 * Exposed the APIs to get to the byte offset in the source XML buffer. This stuff
97 * is not tested yet, but I wanted to get the API changes in now so that the API
98 * can be stablized.
99 *
100 * Revision 1.3  2000/01/12 00:15:04  roddey
101 * Changes to deal with multiply nested, relative pathed, entities and to deal
102 * with the new URL class changes.
103 *
104 * Revision 1.2  1999/12/15 19:48:03  roddey
105 * Changed to use new split of transcoder interfaces into XML transcoders and
106 * LCP transcoders, and implementation of intrinsic transcoders as pluggable
107 * transcoders, and addition of Latin1 intrinsic support.
108 *
109 * Revision 1.1.1.1  1999/11/09 01:08:13  twl
110 * Initial checkin
111 *
112 * Revision 1.4  1999/11/08 20:56:54  droddey
113 * If the main xml entity does not exist, we need to get the error handling for that
114 * inside the main XMLScanner::scanDocument() try block so that it gets reported
115 * in the normal way. We have to add a little extra safety code because, when this
116 * happens, there is no reader on the reader stack to get position ino from.
117 *
118 * Revision 1.3  1999/11/08 20:44:43  rahul
119 * Swat for adding in Product name and CVS comment log variable.
120 *
121 */
122
123#if !defined(READERMGR_HPP)
124#define READERMGR_HPP
125
126#include <xercesc/internal/XMLReader.hpp>
127#include <xercesc/util/PlatformUtils.hpp>
128#include <xercesc/util/RefStackOf.hpp>
129#include <xercesc/sax/Locator.hpp>
130#include <xercesc/framework/XMLBuffer.hpp>
131
132XERCES_CPP_NAMESPACE_BEGIN
133
134class XMLEntityDecl;
135class XMLEntityHandler;
136class XMLDocumentHandler;
137class XMLScanner;
138
139
140// ---------------------------------------------------------------------------
141//  This class is used by the scanner. The scanner must deal with expansion
142//  of entities, some of which are totally different files (external parsed
143//  entities.) It does so by pushing readers onto a stack. The top reader is
144//  the one it wants to read out of, but that one must be popped when it is
145//  empty. To keep that logic from being all over the place, the scanner
146//  talks to the reader manager, which handles the stack and popping off
147//  used up readers.
148// ---------------------------------------------------------------------------
149class XMLPARSER_EXPORT ReaderMgr :   public XMemory
150                                   , public Locator
151{
152public :
153    // -----------------------------------------------------------------------
154    //  Class specific types
155    // -----------------------------------------------------------------------
156    struct LastExtEntityInfo : public XMemory
157    {
158        const   XMLCh*          systemId;
159        const   XMLCh*          publicId;
160                XMLSSize_t      lineNumber;
161                XMLSSize_t      colNumber;
162    };
163
164
165    // -----------------------------------------------------------------------
166    //  Constructors and Destructor
167    // -----------------------------------------------------------------------
168    ReaderMgr(MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager);
169    ~ReaderMgr();
170
171
172    // -----------------------------------------------------------------------
173    //  Convenience scanning methods
174    //
175    //  This are all convenience methods that work in terms of the core
176    //  character spooling methods.
177    // -----------------------------------------------------------------------
178    bool atEOF() const;
179    bool getName(XMLBuffer& toFill);
180    bool getNameToken(XMLBuffer& toFill);
181    XMLCh getNextChar();
182    bool getNextCharIfNot(const XMLCh chNotToGet, XMLCh& chGotten);
183    void movePlainContentChars(XMLBuffer &dest);
184    void getSpaces(XMLBuffer& toFill);
185    void getUpToCharOrWS(XMLBuffer& toFill, const XMLCh toCheck);
186    bool isEmpty() const;
187    bool lookingAtChar(const XMLCh toCheck);
188    bool lookingAtSpace();
189    XMLCh peekNextChar();
190    bool skipIfQuote(XMLCh& chGotten);
191    void skipPastChar(const XMLCh toSkip);
192    bool skipPastSpaces(bool inDecl = false);
193    void skipToChar(const XMLCh toSkipTo);
194    bool skippedChar(const XMLCh toSkip);
195    bool skippedSpace();
196    bool skippedString(const XMLCh* const toSkip);
197    void skipQuotedString(const XMLCh quoteCh);
198    XMLCh skipUntilIn(const XMLCh* const listToSkip);
199    XMLCh skipUntilInOrWS(const XMLCh* const listToSkip);
200    bool peekString(const XMLCh* const toPeek);
201
202
203    // -----------------------------------------------------------------------
204    //  Control methods
205    // -----------------------------------------------------------------------
206    void cleanStackBackTo(const unsigned int readerNum);
207    XMLReader* createReader
208    (
209        const   InputSource&        src
210        , const bool                xmlDecl
211        , const XMLReader::RefFrom  refFrom
212        , const XMLReader::Types    type
213        , const XMLReader::Sources  source
214        , const bool                calcSrsOfs = true
215    );
216    XMLReader* createReader
217    (
218        const   XMLCh* const        sysId
219        , const XMLCh* const        pubId
220        , const bool                xmlDecl
221        , const XMLReader::RefFrom  refFrom
222        , const XMLReader::Types    type
223        , const XMLReader::Sources  source
224        ,       InputSource*&       srcToFill
225        , const bool                calcSrcOfs = true
226    );
227    XMLReader* createReader
228    (
229        const   XMLCh* const        baseURI
230        , const XMLCh* const        sysId
231        , const XMLCh* const        pubId
232        , const bool                xmlDecl
233        , const XMLReader::RefFrom  refFrom
234        , const XMLReader::Types    type
235        , const XMLReader::Sources  source
236        ,       InputSource*&       srcToFill
237        , const bool                calcSrcOfs = true
238    );
239    XMLReader* createIntEntReader
240    (
241        const   XMLCh* const        sysId
242        , const XMLReader::RefFrom  refFrom
243        , const XMLReader::Types    type
244        , const XMLCh* const        dataBuf
245        , const unsigned int        dataLen
246        , const bool                copyBuf
247        , const bool                calcSrcOfs = true
248    );
249    bool isScanningPERefOutOfLiteral() const;
250    bool pushReader
251    (
252                XMLReader* const        reader
253        ,       XMLEntityDecl* const    entity
254    );
255    void reset();
256
257
258    // -----------------------------------------------------------------------
259    //  Getter methods
260    // -----------------------------------------------------------------------
261    const XMLCh* getCurrentEncodingStr() const;
262    const XMLEntityDecl* getCurrentEntity() const;
263    XMLEntityDecl* getCurrentEntity();
264    const XMLReader* getCurrentReader() const;
265    XMLReader* getCurrentReader();
266    unsigned int getCurrentReaderNum() const;
267    unsigned int getReaderDepth() const;
268    void getLastExtEntityInfo(LastExtEntityInfo& lastInfo) const;
269    unsigned int getSrcOffset() const;
270    bool getThrowEOE() const;
271
272
273    // -----------------------------------------------------------------------
274    //  Setter methods
275    // -----------------------------------------------------------------------
276    void setEntityHandler(XMLEntityHandler* const newHandler);
277    void setThrowEOE(const bool newValue);
278    void setXMLVersion(const XMLReader::XMLVersion version);
279    void setStandardUriConformant(const bool newValue);
280
281    // -----------------------------------------------------------------------
282    //  Implement the SAX Locator interface
283    // -----------------------------------------------------------------------
284    virtual const XMLCh* getPublicId() const;
285    virtual const XMLCh* getSystemId() const;
286    virtual XMLSSize_t getLineNumber() const;
287    virtual XMLSSize_t getColumnNumber() const;
288
289
290private :
291    // -----------------------------------------------------------------------
292    //  Private helper methods
293    // -----------------------------------------------------------------------
294    const XMLReader* getLastExtEntity(const XMLEntityDecl*& itsEntity) const;
295    bool popReader();
296
297    // -----------------------------------------------------------------------
298    //  Unimplemented constructors and operators
299    // -----------------------------------------------------------------------
300    ReaderMgr(const ReaderMgr&);
301    ReaderMgr& operator=(const ReaderMgr&);
302
303    // -----------------------------------------------------------------------
304    //  Private data members
305    //
306    //  fCurEntity
307    //      This is the current top of stack entity. We pull it off the stack
308    //      and store it here for efficiency.
309    //
310    //  fCurReader
311    //      This is the current top of stack reader. We pull it off the
312    //      stack and store it here for efficiency.
313    //
314    //  fEntityHandler
315    //      This is the installed entity handler. Its installed via the
316    //      scanner but he passes it on to us since we need it the most, in
317    //      process of creating external entity readers.
318    //
319    //  fEntityStack
320    //      We need to keep up with which of the pushed readers are pushed
321    //      entity values that are being spooled. This is done to avoid the
322    //      problem of recursive definitions. This stack consists of refs to
323    //      EntityDecl objects for the pushed entities.
324    //
325    //  fNextReaderNum
326    //      This is the reader serial number value. Each new reader that is
327    //      created from this reader is given a successive number. This lets
328    //      us catch things like partial markup errors and such.
329    //
330    //  fReaderStack
331    //      This is the stack of reader references. We own all the readers
332    //      and destroy them when they are used up.
333    //
334    //  fThrowEOE
335    //      This flag controls whether we throw an exception when we hit an
336    //      end of entity. The scanner doesn't really need to know about ends
337    //      of entities in the int/ext subsets, so it will turn this flag off
338    //      until it gets into the content usually.
339    //
340    //  fXMLVersion
341    //      Enum to indicate if each Reader should be created as XML 1.1 or
342    //      XML 1.0 conformant
343    //
344    //  fStandardUriConformant
345    //      This flag controls whether we force conformant URI
346    // -----------------------------------------------------------------------
347    XMLEntityDecl*              fCurEntity;
348    XMLReader*                  fCurReader;
349    XMLEntityHandler*           fEntityHandler;
350    RefStackOf<XMLEntityDecl>*  fEntityStack;
351    unsigned int                fNextReaderNum;
352    RefStackOf<XMLReader>*      fReaderStack;
353    bool                        fThrowEOE;
354    XMLReader::XMLVersion       fXMLVersion;
355    bool                        fStandardUriConformant;
356    MemoryManager*              fMemoryManager;
357};
358
359
360
361// ---------------------------------------------------------------------------
362//  ReaderMgr: Inlined methods
363//
364//  NOTE: We cannot put these in alphabetical and type order as we usually
365//  do because some of the compilers we have to support are too stupid to
366//  understand out of order inlines!
367// ---------------------------------------------------------------------------
368inline unsigned int ReaderMgr::getCurrentReaderNum() const
369{
370    return fCurReader->getReaderNum();
371}
372
373inline const XMLReader* ReaderMgr::getCurrentReader() const
374{
375    return fCurReader;
376}
377
378inline XMLReader* ReaderMgr::getCurrentReader()
379{
380    return fCurReader;
381}
382
383inline bool ReaderMgr::getName(XMLBuffer& toFill)
384{
385    toFill.reset();
386    return fCurReader->getName(toFill, false);
387}
388
389inline bool ReaderMgr::getNameToken(XMLBuffer& toFill)
390{
391    toFill.reset();
392    return fCurReader->getName(toFill, true);
393}
394
395inline bool ReaderMgr::getNextCharIfNot(const XMLCh chNotToGet, XMLCh& chGotten)
396{
397    return fCurReader->getNextCharIfNot(chNotToGet, chGotten);
398}
399
400inline void ReaderMgr::movePlainContentChars(XMLBuffer &dest)
401{
402    fCurReader->movePlainContentChars(dest);
403}
404
405inline bool ReaderMgr::getThrowEOE() const
406{
407    return fThrowEOE;
408}
409
410inline unsigned int ReaderMgr::getSrcOffset() const
411{
412    return fCurReader->getSrcOffset();
413}
414
415inline bool ReaderMgr::lookingAtChar(const XMLCh chToCheck)
416{
417    return (chToCheck == peekNextChar());
418}
419
420inline bool ReaderMgr::lookingAtSpace()
421{
422    XMLCh c = peekNextChar();
423    return fCurReader->isWhitespace(c);
424}
425
426inline void ReaderMgr::setThrowEOE(const bool newValue)
427{
428    fThrowEOE = newValue;
429}
430
431inline void ReaderMgr::setStandardUriConformant(const bool newValue)
432{
433    fStandardUriConformant = newValue;
434}
435
436inline bool ReaderMgr::skippedString(const XMLCh* const toSkip)
437{
438    return fCurReader->skippedString(toSkip);
439}
440
441inline void ReaderMgr::skipToChar(const XMLCh toSkipTo)
442{
443    while (true)
444    {
445        // Get chars until we find the one to skip
446        const XMLCh nextCh = getNextChar();
447
448        // Break out at end of input or the char to skip
449        if ((nextCh == toSkipTo) || !nextCh)
450            break;
451    }
452}
453
454inline void ReaderMgr::skipPastChar(const XMLCh toSkipPast)
455{
456    while (true)
457    {
458        // Get chars until we find the one to skip
459        const XMLCh nextCh = getNextChar();
460
461        if ((nextCh == toSkipPast) || !nextCh)
462            break;
463    }
464}
465
466inline bool ReaderMgr::peekString(const XMLCh* const toPeek)
467{
468    return fCurReader->peekString(toPeek);
469}
470
471inline void ReaderMgr::setEntityHandler(XMLEntityHandler* const newHandler)
472{
473    fEntityHandler = newHandler;
474}
475
476inline void ReaderMgr::setXMLVersion(const XMLReader::XMLVersion version)
477{
478    fXMLVersion = version;
479    fCurReader->setXMLVersion(version);
480}
481
482//
483//  This is a simple class to temporarily change the 'throw at end of entity'
484//  flag of the reader manager. There are some places where we need to
485//  turn this on and off on a scoped basis.
486//
487class XMLPARSER_EXPORT ThrowEOEJanitor
488{
489public :
490    // -----------------------------------------------------------------------
491    //  Constructors and destructor
492    // -----------------------------------------------------------------------
493    ThrowEOEJanitor(ReaderMgr* mgrTarget, const bool newValue) :
494
495        fOld(mgrTarget->getThrowEOE())
496        , fMgr(mgrTarget)
497    {
498        mgrTarget->setThrowEOE(newValue);
499    }
500
501    ~ThrowEOEJanitor()
502    {
503        fMgr->setThrowEOE(fOld);
504    };
505
506private :
507    // -----------------------------------------------------------------------
508    //  Unimplemented constructors and operators
509    // -----------------------------------------------------------------------   
510    ThrowEOEJanitor(const ThrowEOEJanitor&);
511    ThrowEOEJanitor& operator=(const ThrowEOEJanitor&);
512
513    // -----------------------------------------------------------------------
514    //  Private data members
515    //
516    //  fOld
517    //      The previous value of the flag, which we replaced during ctor,
518    //      and will replace during dtor.
519    //
520    //  fMgr
521    //      A pointer to the reader manager we are going to set/reset the
522    //      flag on.
523    // -----------------------------------------------------------------------
524    bool        fOld;
525    ReaderMgr*  fMgr;
526};
527
528XERCES_CPP_NAMESPACE_END
529
530#endif
Note: See TracBrowser for help on using the repository browser.