source: NonGTP/Xerces/xercesc/internal/ReaderMgr.hpp @ 188

Revision 188, 20.0 KB checked in by mattausch, 19 years ago (diff)

added xercesc to support

Line 
1/*
2 * The Apache Software License, Version 1.1
3 *
4 * Copyright (c) 1999-2000 The Apache Software Foundation.  All rights
5 * reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 *
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in
16 *    the documentation and/or other materials provided with the
17 *    distribution.
18 *
19 * 3. The end-user documentation included with the redistribution,
20 *    if any, must include the following acknowledgment:
21 *       "This product includes software developed by the
22 *        Apache Software Foundation (http://www.apache.org/)."
23 *    Alternately, this acknowledgment may appear in the software itself,
24 *    if and wherever such third-party acknowledgments normally appear.
25 *
26 * 4. The names "Xerces" and "Apache Software Foundation" must
27 *    not be used to endorse or promote products derived from this
28 *    software without prior written permission. For written
29 *    permission, please contact apache\@apache.org.
30 *
31 * 5. Products derived from this software may not be called "Apache",
32 *    nor may "Apache" appear in their name, without prior written
33 *    permission of the Apache Software Foundation.
34 *
35 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
36 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
37 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
38 * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
39 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
40 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
41 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
42 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
43 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
44 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
45 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
46 * SUCH DAMAGE.
47 * ====================================================================
48 *
49 * This software consists of voluntary contributions made by many
50 * individuals on behalf of the Apache Software Foundation, and was
51 * originally based on software copyright (c) 1999, International
52 * Business Machines, Inc., http://www.ibm.com .  For more information
53 * on the Apache Software Foundation, please see
54 * <http://www.apache.org/>.
55 */
56
57/*
58 * $Log: ReaderMgr.hpp,v $
59 * Revision 1.11  2004/01/29 11:46:30  cargilld
60 * Code cleanup changes to get rid of various compiler diagnostic messages.
61 *
62 * Revision 1.10  2003/08/14 02:56:41  knoaman
63 * Code refactoring to improve performance of validation.
64 *
65 * Revision 1.9  2003/05/16 21:36:57  knoaman
66 * Memory manager implementation: Modify constructors to pass in the memory manager.
67 *
68 * Revision 1.8  2003/05/15 18:26:29  knoaman
69 * Partial implementation of the configurable memory manager.
70 *
71 * Revision 1.7  2003/01/03 20:08:39  tng
72 * New feature StandardUriConformant to force strict standard uri conformance.
73 *
74 * Revision 1.6  2002/12/20 22:09:56  tng
75 * XML 1.1
76 *
77 * Revision 1.5  2002/12/03 15:31:19  knoaman
78 * Enable/disable calculation of src offset.
79 *
80 * Revision 1.4  2002/11/04 14:58:18  tng
81 * C++ Namespace Support.
82 *
83 * Revision 1.3  2002/08/22 19:28:11  tng
84 * [Bug 11448] DomCount has problems with XHTML1.1 DTD.
85 *
86 * Revision 1.2  2002/05/27 18:42:14  tng
87 * To get ready for 64 bit large file, use XMLSSize_t to represent line and column number.
88 *
89 * Revision 1.1.1.1  2002/02/01 22:21:58  peiyongz
90 * sane_include
91 *
92 * Revision 1.13  2001/07/12 18:50:08  tng
93 * Some performance modification regarding standalone check and xml decl check.
94 *
95 * Revision 1.12  2000/09/09 00:18:18  andyh
96 * Reordered member variables in ThrowEOEJanitor.  Patch submitted
97 * by Kirk Wylie.
98 *
99 * Revision 1.11  2000/07/08 00:17:13  andyh
100 * Cleanup of yesterday's speedup changes.  Merged new bit into the
101 * scanner character properties table.
102 *
103 * Revision 1.10  2000/07/07 01:08:44  andyh
104 * Parser speed up in scan of XML content.
105 *
106 * Revision 1.9  2000/03/02 19:54:29  roddey
107 * This checkin includes many changes done while waiting for the
108 * 1.1.0 code to be finished. I can't list them all here, but a list is
109 * available elsewhere.
110 *
111 * Revision 1.8  2000/02/24 20:18:07  abagchi
112 * Swat for removing Log from API docs
113 *
114 * Revision 1.7  2000/02/24 02:12:53  aruna1
115 * ReaderMgr:;getReaderDepth() added
116 *
117 * Revision 1.6  2000/02/06 07:47:53  rahulj
118 * Year 2K copyright swat.
119 *
120 * Revision 1.5  2000/01/25 01:04:21  roddey
121 * Fixes a bogus error about ]]> in char data.
122 *
123 * Revision 1.4  2000/01/24 20:40:43  roddey
124 * Exposed the APIs to get to the byte offset in the source XML buffer. This stuff
125 * is not tested yet, but I wanted to get the API changes in now so that the API
126 * can be stablized.
127 *
128 * Revision 1.3  2000/01/12 00:15:04  roddey
129 * Changes to deal with multiply nested, relative pathed, entities and to deal
130 * with the new URL class changes.
131 *
132 * Revision 1.2  1999/12/15 19:48:03  roddey
133 * Changed to use new split of transcoder interfaces into XML transcoders and
134 * LCP transcoders, and implementation of intrinsic transcoders as pluggable
135 * transcoders, and addition of Latin1 intrinsic support.
136 *
137 * Revision 1.1.1.1  1999/11/09 01:08:13  twl
138 * Initial checkin
139 *
140 * Revision 1.4  1999/11/08 20:56:54  droddey
141 * If the main xml entity does not exist, we need to get the error handling for that
142 * inside the main XMLScanner::scanDocument() try block so that it gets reported
143 * in the normal way. We have to add a little extra safety code because, when this
144 * happens, there is no reader on the reader stack to get position ino from.
145 *
146 * Revision 1.3  1999/11/08 20:44:43  rahul
147 * Swat for adding in Product name and CVS comment log variable.
148 *
149 */
150
151#if !defined(READERMGR_HPP)
152#define READERMGR_HPP
153
154#include <xercesc/internal/XMLReader.hpp>
155#include <xercesc/util/PlatformUtils.hpp>
156#include <xercesc/util/RefStackOf.hpp>
157#include <xercesc/sax/Locator.hpp>
158#include <xercesc/framework/XMLBuffer.hpp>
159
160XERCES_CPP_NAMESPACE_BEGIN
161
162class XMLEntityDecl;
163class XMLEntityHandler;
164class XMLDocumentHandler;
165class XMLScanner;
166
167
168// ---------------------------------------------------------------------------
169//  This class is used by the scanner. The scanner must deal with expansion
170//  of entities, some of which are totally different files (external parsed
171//  entities.) It does so by pushing readers onto a stack. The top reader is
172//  the one it wants to read out of, but that one must be popped when it is
173//  empty. To keep that logic from being all over the place, the scanner
174//  talks to the reader manager, which handles the stack and popping off
175//  used up readers.
176// ---------------------------------------------------------------------------
177class XMLPARSER_EXPORT ReaderMgr :   public XMemory
178                                   , public Locator
179{
180public :
181    // -----------------------------------------------------------------------
182    //  Class specific types
183    // -----------------------------------------------------------------------
184    struct LastExtEntityInfo : public XMemory
185    {
186        const   XMLCh*          systemId;
187        const   XMLCh*          publicId;
188                XMLSSize_t      lineNumber;
189                XMLSSize_t      colNumber;
190    };
191
192
193    // -----------------------------------------------------------------------
194    //  Constructors and Destructor
195    // -----------------------------------------------------------------------
196    ReaderMgr(MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager);
197    ~ReaderMgr();
198
199
200    // -----------------------------------------------------------------------
201    //  Convenience scanning methods
202    //
203    //  This are all convenience methods that work in terms of the core
204    //  character spooling methods.
205    // -----------------------------------------------------------------------
206    bool atEOF() const;
207    bool getName(XMLBuffer& toFill);
208    bool getNameToken(XMLBuffer& toFill);
209    XMLCh getNextChar();
210    bool getNextCharIfNot(const XMLCh chNotToGet, XMLCh& chGotten);
211    void movePlainContentChars(XMLBuffer &dest);
212    void getSpaces(XMLBuffer& toFill);
213    void getUpToCharOrWS(XMLBuffer& toFill, const XMLCh toCheck);
214    bool isEmpty() const;
215    bool lookingAtChar(const XMLCh toCheck);
216    bool lookingAtSpace();
217    XMLCh peekNextChar();
218    bool skipIfQuote(XMLCh& chGotten);
219    void skipPastChar(const XMLCh toSkip);
220    bool skipPastSpaces();
221    void skipToChar(const XMLCh toSkipTo);
222    bool skippedChar(const XMLCh toSkip);
223    bool skippedSpace();
224    bool skippedString(const XMLCh* const toSkip);
225    void skipQuotedString(const XMLCh quoteCh);
226    XMLCh skipUntilIn(const XMLCh* const listToSkip);
227    XMLCh skipUntilInOrWS(const XMLCh* const listToSkip);
228    bool peekString(const XMLCh* const toPeek);
229
230
231    // -----------------------------------------------------------------------
232    //  Control methods
233    // -----------------------------------------------------------------------
234    void cleanStackBackTo(const unsigned int readerNum);
235    XMLReader* createReader
236    (
237        const   InputSource&        src
238        , const bool                xmlDecl
239        , const XMLReader::RefFrom  refFrom
240        , const XMLReader::Types    type
241        , const XMLReader::Sources  source
242        , const bool                calcSrsOfs = true
243    );
244    XMLReader* createReader
245    (
246        const   XMLCh* const        sysId
247        , const XMLCh* const        pubId
248        , const bool                xmlDecl
249        , const XMLReader::RefFrom  refFrom
250        , const XMLReader::Types    type
251        , const XMLReader::Sources  source
252        ,       InputSource*&       srcToFill
253        , const bool                calcSrcOfs = true
254    );
255    XMLReader* createReader
256    (
257        const   XMLCh* const        baseURI
258        , const XMLCh* const        sysId
259        , const XMLCh* const        pubId
260        , const bool                xmlDecl
261        , const XMLReader::RefFrom  refFrom
262        , const XMLReader::Types    type
263        , const XMLReader::Sources  source
264        ,       InputSource*&       srcToFill
265        , const bool                calcSrcOfs = true
266    );
267    XMLReader* createIntEntReader
268    (
269        const   XMLCh* const        sysId
270        , const XMLReader::RefFrom  refFrom
271        , const XMLReader::Types    type
272        , const XMLCh* const        dataBuf
273        , const unsigned int        dataLen
274        , const bool                copyBuf
275        , const bool                calcSrcOfs = true
276    );
277    bool isScanningPERefOutOfLiteral() const;
278    bool pushReader
279    (
280                XMLReader* const        reader
281        ,       XMLEntityDecl* const    entity
282    );
283    void reset();
284
285
286    // -----------------------------------------------------------------------
287    //  Getter methods
288    // -----------------------------------------------------------------------
289    const XMLCh* getCurrentEncodingStr() const;
290    const XMLEntityDecl* getCurrentEntity() const;
291    XMLEntityDecl* getCurrentEntity();
292    const XMLReader* getCurrentReader() const;
293    XMLReader* getCurrentReader();
294    unsigned int getCurrentReaderNum() const;
295    unsigned int getReaderDepth() const;
296    void getLastExtEntityInfo(LastExtEntityInfo& lastInfo) const;
297    unsigned int getSrcOffset() const;
298    bool getThrowEOE() const;
299
300
301    // -----------------------------------------------------------------------
302    //  Setter methods
303    // -----------------------------------------------------------------------
304    void setEntityHandler(XMLEntityHandler* const newHandler);
305    void setThrowEOE(const bool newValue);
306    void setXMLVersion(const XMLReader::XMLVersion version);
307    void setStandardUriConformant(const bool newValue);
308
309    // -----------------------------------------------------------------------
310    //  Implement the SAX Locator interface
311    // -----------------------------------------------------------------------
312    virtual const XMLCh* getPublicId() const;
313    virtual const XMLCh* getSystemId() const;
314    virtual XMLSSize_t getLineNumber() const;
315    virtual XMLSSize_t getColumnNumber() const;
316
317
318private :
319    // -----------------------------------------------------------------------
320    //  Private helper methods
321    // -----------------------------------------------------------------------
322    const XMLReader* getLastExtEntity(const XMLEntityDecl*& itsEntity) const;
323    bool popReader();
324
325    // -----------------------------------------------------------------------
326    //  Unimplemented constructors and operators
327    // -----------------------------------------------------------------------
328    ReaderMgr(const ReaderMgr&);
329    ReaderMgr& operator=(const ReaderMgr&);
330
331    // -----------------------------------------------------------------------
332    //  Private data members
333    //
334    //  fCurEntity
335    //      This is the current top of stack entity. We pull it off the stack
336    //      and store it here for efficiency.
337    //
338    //  fCurReader
339    //      This is the current top of stack reader. We pull it off the
340    //      stack and store it here for efficiency.
341    //
342    //  fEntityHandler
343    //      This is the installed entity handler. Its installed via the
344    //      scanner but he passes it on to us since we need it the most, in
345    //      process of creating external entity readers.
346    //
347    //  fEntityStack
348    //      We need to keep up with which of the pushed readers are pushed
349    //      entity values that are being spooled. This is done to avoid the
350    //      problem of recursive definitions. This stack consists of refs to
351    //      EntityDecl objects for the pushed entities.
352    //
353    //  fNextReaderNum
354    //      This is the reader serial number value. Each new reader that is
355    //      created from this reader is given a successive number. This lets
356    //      us catch things like partial markup errors and such.
357    //
358    //  fReaderStack
359    //      This is the stack of reader references. We own all the readers
360    //      and destroy them when they are used up.
361    //
362    //  fThrowEOE
363    //      This flag controls whether we throw an exception when we hit an
364    //      end of entity. The scanner doesn't really need to know about ends
365    //      of entities in the int/ext subsets, so it will turn this flag off
366    //      until it gets into the content usually.
367    //
368    //  fXMLVersion
369    //      Enum to indicate if each Reader should be created as XML 1.1 or
370    //      XML 1.0 conformant
371    //
372    //  fStandardUriConformant
373    //      This flag controls whether we force conformant URI
374    // -----------------------------------------------------------------------
375    XMLEntityDecl*              fCurEntity;
376    XMLReader*                  fCurReader;
377    XMLEntityHandler*           fEntityHandler;
378    RefStackOf<XMLEntityDecl>*  fEntityStack;
379    unsigned int                fNextReaderNum;
380    RefStackOf<XMLReader>*      fReaderStack;
381    bool                        fThrowEOE;
382    XMLReader::XMLVersion       fXMLVersion;
383    bool                        fStandardUriConformant;
384    MemoryManager*              fMemoryManager;
385};
386
387
388
389// ---------------------------------------------------------------------------
390//  ReaderMgr: Inlined methods
391//
392//  NOTE: We cannot put these in alphabetical and type order as we usually
393//  do because some of the compilers we have to support are too stupid to
394//  understand out of order inlines!
395// ---------------------------------------------------------------------------
396inline unsigned int ReaderMgr::getCurrentReaderNum() const
397{
398    return fCurReader->getReaderNum();
399}
400
401inline const XMLReader* ReaderMgr::getCurrentReader() const
402{
403    return fCurReader;
404}
405
406inline XMLReader* ReaderMgr::getCurrentReader()
407{
408    return fCurReader;
409}
410
411inline bool ReaderMgr::getName(XMLBuffer& toFill)
412{
413    toFill.reset();
414    return fCurReader->getName(toFill, false);
415}
416
417inline bool ReaderMgr::getNameToken(XMLBuffer& toFill)
418{
419    toFill.reset();
420    return fCurReader->getName(toFill, true);
421}
422
423inline bool ReaderMgr::getNextCharIfNot(const XMLCh chNotToGet, XMLCh& chGotten)
424{
425    return fCurReader->getNextCharIfNot(chNotToGet, chGotten);
426}
427
428inline void ReaderMgr::movePlainContentChars(XMLBuffer &dest)
429{
430    fCurReader->movePlainContentChars(dest);
431}
432
433inline bool ReaderMgr::getThrowEOE() const
434{
435    return fThrowEOE;
436}
437
438inline unsigned int ReaderMgr::getSrcOffset() const
439{
440    return fCurReader->getSrcOffset();
441}
442
443inline bool ReaderMgr::lookingAtChar(const XMLCh chToCheck)
444{
445    return (chToCheck == peekNextChar());
446}
447
448inline bool ReaderMgr::lookingAtSpace()
449{
450    XMLCh c = peekNextChar();
451    return fCurReader->isWhitespace(c);
452}
453
454inline void ReaderMgr::setThrowEOE(const bool newValue)
455{
456    fThrowEOE = newValue;
457}
458
459inline void ReaderMgr::setStandardUriConformant(const bool newValue)
460{
461    fStandardUriConformant = newValue;
462}
463
464inline bool ReaderMgr::skippedString(const XMLCh* const toSkip)
465{
466    return fCurReader->skippedString(toSkip);
467}
468
469inline void ReaderMgr::skipToChar(const XMLCh toSkipTo)
470{
471    while (true)
472    {
473        // Get chars until we find the one to skip
474        const XMLCh nextCh = getNextChar();
475
476        // Break out at end of input or the char to skip
477        if ((nextCh == toSkipTo) || !nextCh)
478            break;
479    }
480}
481
482inline void ReaderMgr::skipPastChar(const XMLCh toSkipPast)
483{
484    while (true)
485    {
486        // Get chars until we find the one to skip
487        const XMLCh nextCh = getNextChar();
488
489        if ((nextCh == toSkipPast) || !nextCh)
490            break;
491    }
492}
493
494inline bool ReaderMgr::peekString(const XMLCh* const toPeek)
495{
496    return fCurReader->peekString(toPeek);
497}
498
499inline void ReaderMgr::setEntityHandler(XMLEntityHandler* const newHandler)
500{
501    fEntityHandler = newHandler;
502}
503
504inline void ReaderMgr::setXMLVersion(const XMLReader::XMLVersion version)
505{
506    fXMLVersion = version;
507    fCurReader->setXMLVersion(version);
508}
509
510//
511//  This is a simple class to temporarily change the 'throw at end of entity'
512//  flag of the reader manager. There are some places where we need to
513//  turn this on and off on a scoped basis.
514//
515class XMLPARSER_EXPORT ThrowEOEJanitor
516{
517public :
518    // -----------------------------------------------------------------------
519    //  Constructors and destructor
520    // -----------------------------------------------------------------------
521    ThrowEOEJanitor(ReaderMgr* mgrTarget, const bool newValue) :
522
523        fOld(mgrTarget->getThrowEOE())
524        , fMgr(mgrTarget)
525    {
526        mgrTarget->setThrowEOE(newValue);
527    }
528
529    ~ThrowEOEJanitor()
530    {
531        fMgr->setThrowEOE(fOld);
532    };
533
534private :
535    // -----------------------------------------------------------------------
536    //  Unimplemented constructors and operators
537    // -----------------------------------------------------------------------   
538    ThrowEOEJanitor(const ThrowEOEJanitor&);
539    ThrowEOEJanitor& operator=(const ThrowEOEJanitor&);
540
541    // -----------------------------------------------------------------------
542    //  Private data members
543    //
544    //  fOld
545    //      The previous value of the flag, which we replaced during ctor,
546    //      and will replace during dtor.
547    //
548    //  fMgr
549    //      A pointer to the reader manager we are going to set/reset the
550    //      flag on.
551    // -----------------------------------------------------------------------
552    bool        fOld;
553    ReaderMgr*  fMgr;
554};
555
556XERCES_CPP_NAMESPACE_END
557
558#endif
Note: See TracBrowser for help on using the repository browser.