source: NonGTP/Xerces/xercesc/internal/XMLReader.hpp @ 188

Revision 188, 33.3 KB checked in by mattausch, 20 years ago (diff)

added xercesc to support

Line 
1/*
2 * The Apache Software License, Version 1.1
3 *
4 * Copyright (c) 1999-2001 The Apache Software Foundation.  All rights
5 * reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 *
14 * 2. Redistributions in binary form must reproduce the above copyright
15 *    notice, this list of conditions and the following disclaimer in
16 *    the documentation and/or other materials provided with the
17 *    distribution.
18 *
19 * 3. The end-user documentation included with the redistribution,
20 *    if any, must include the following acknowledgment:
21 *       "This product includes software developed by the
22 *        Apache Software Foundation (http://www.apache.org/)."
23 *    Alternately, this acknowledgment may appear in the software itself,
24 *    if and wherever such third-party acknowledgments normally appear.
25 *
26 * 4. The names "Xerces" and "Apache Software Foundation" must
27 *    not be used to endorse or promote products derived from this
28 *    software without prior written permission. For written
29 *    permission, please contact apache\@apache.org.
30 *
31 * 5. Products derived from this software may not be called "Apache",
32 *    nor may "Apache" appear in their name, without prior written
33 *    permission of the Apache Software Foundation.
34 *
35 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
36 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
37 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
38 * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
39 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
40 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
41 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
42 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
43 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
44 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
45 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
46 * SUCH DAMAGE.
47 * ====================================================================
48 *
49 * This software consists of voluntary contributions made by many
50 * individuals on behalf of the Apache Software Foundation, and was
51 * originally based on software copyright (c) 1999, International
52 * Business Machines, Inc., http://www.ibm.com .  For more information
53 * on the Apache Software Foundation, please see
54 * <http://www.apache.org/>.
55 */
56
57/*
58 * $Log: XMLReader.hpp,v $
59 * Revision 1.15  2004/01/29 11:46:30  cargilld
60 * Code cleanup changes to get rid of various compiler diagnostic messages.
61 *
62 * Revision 1.14  2003/05/16 21:36:58  knoaman
63 * Memory manager implementation: Modify constructors to pass in the memory manager.
64 *
65 * Revision 1.13  2003/05/15 18:26:29  knoaman
66 * Partial implementation of the configurable memory manager.
67 *
68 * Revision 1.12  2003/01/27 16:50:27  knoaman
69 * some cleanup.
70 *
71 * Revision 1.11  2002/12/20 22:09:56  tng
72 * XML 1.1
73 *
74 * Revision 1.10  2002/12/11 22:09:08  knoaman
75 * Performance: reduce instructions count.
76 *
77 * Revision 1.9  2002/12/03 15:31:19  knoaman
78 * Enable/disable calculation of src offset.
79 *
80 * Revision 1.8  2002/12/02 17:20:05  knoaman
81 * Remove unused data member.
82 *
83 * Revision 1.7  2002/11/28 19:19:12  knoaman
84 * Performance: remove unnecessary if condition.
85 *
86 * Revision 1.6  2002/11/28 18:17:22  knoaman
87 * Performance: make getNextChar/peekNextChar inline.
88 *
89 * Revision 1.5  2002/11/25 21:31:08  tng
90 * Performance:
91 * 1. use XMLRecognizer::Encodings enum to make new transcode, faster than comparing the encoding string every time.
92 * 2. Pre uppercase the encodingString before calling encodingForName to avoid calling compareIString
93 *
94 * Revision 1.4  2002/11/04 14:58:19  tng
95 * C++ Namespace Support.
96 *
97 * Revision 1.3  2002/09/27 12:56:23  tng
98 * [Bug 12740] Extra include.  By Peter Volchek.
99 *
100 * Revision 1.2  2002/05/27 18:42:14  tng
101 * To get ready for 64 bit large file, use XMLSSize_t to represent line and column number.
102 *
103 * Revision 1.1.1.1  2002/02/01 22:22:02  peiyongz
104 * sane_include
105 *
106 * Revision 1.18  2001/12/06 17:47:04  tng
107 * Performance Enhancement.  Modify the handling of the fNEL option so that it results in fgCharCharsTable being modified, instead of having all of the low-level routines check the option.  This seemed acceptable because the code appears to only permit the option to be turned on and not turned off again.   By Henry Zongaro.
108 *
109 * Revision 1.17  2001/07/12 18:50:13  tng
110 * Some performance modification regarding standalone check and xml decl check.
111 *
112 * Revision 1.16  2001/05/11 13:26:17  tng
113 * Copyright update.
114 *
115 * Revision 1.15  2001/05/03 18:42:51  knoaman
116 * Added new option to the parsers so that the NEL (0x85) char can be treated as a newline character.
117 *
118 * Revision 1.14  2001/01/25 19:16:58  tng
119 * const should be used instead of static const.  Fixed by Khaled Noaman.
120 *
121 * Revision 1.13  2000/07/25 22:33:05  aruna1
122 * Char definitions in XMLUni moved to XMLUniDefs
123 *
124 * Revision 1.12  2000/07/08 00:17:13  andyh
125 * Cleanup of yesterday's speedup changes.  Merged new bit into the
126 * scanner character properties table.
127 *
128 * Revision 1.11  2000/07/07 01:08:44  andyh
129 * Parser speed up in scan of XML content.
130 *
131 * Revision 1.10  2000/07/06 21:00:52  jpolast
132 * inlined getNextCharIfNot() for better performance
133 *
134 * Revision 1.9  2000/05/11 23:11:33  andyh
135 * Add missing validity checks for stand-alone documents, character range
136 * and Well-formed parsed entities.  Changes contributed by Sean MacRoibeaird
137 * <sean.Macroibeaird@ireland.sun.com>
138 *
139 * Revision 1.8  2000/03/02 19:54:29  roddey
140 * This checkin includes many changes done while waiting for the
141 * 1.1.0 code to be finished. I can't list them all here, but a list is
142 * available elsewhere.
143 *
144 * Revision 1.7  2000/02/24 20:18:07  abagchi
145 * Swat for removing Log from API docs
146 *
147 * Revision 1.6  2000/02/06 07:47:53  rahulj
148 * Year 2K copyright swat.
149 *
150 * Revision 1.5  2000/01/25 01:04:21  roddey
151 * Fixes a bogus error about ]]> in char data.
152 *
153 * Revision 1.4  2000/01/22 00:01:08  roddey
154 * Simple change to get rid of two hard coded 'x' type characters, which won't
155 * work on EBCDIC systems.
156 *
157 * Revision 1.3  1999/12/18 00:20:00  roddey
158 * More changes to support the new, completely orthagonal, support for
159 * intrinsic encodings.
160 *
161 * Revision 1.2  1999/12/15 19:48:03  roddey
162 * Changed to use new split of transcoder interfaces into XML transcoders and
163 * LCP transcoders, and implementation of intrinsic transcoders as pluggable
164 * transcoders, and addition of Latin1 intrinsic support.
165 *
166 * Revision 1.1.1.1  1999/11/09 01:08:22  twl
167 * Initial checkin
168 *
169 * Revision 1.3  1999/11/08 20:44:47  rahul
170 * Swat for adding in Product name and CVS comment log variable.
171 *
172 */
173
174#if !defined(XMLREADER_HPP)
175#define XMLREADER_HPP
176
177#include <xercesc/util/XMLChar.hpp>
178#include <xercesc/framework/XMLRecognizer.hpp>
179#include <xercesc/framework/XMLBuffer.hpp>
180
181XERCES_CPP_NAMESPACE_BEGIN
182
183class InputSource;
184class BinInputStream;
185class ReaderMgr;
186class XMLScanner;
187class XMLTranscoder;
188
189
190// ---------------------------------------------------------------------------
191//  Instances of this class are used to manage the content of entities. The
192//  scanner maintains a stack of these, one for each entity (this means entity
193//  in the sense of any parsed file or internal entity) currently being
194//  scanned. This class, given a binary input stream will handle reading in
195//  the data and decoding it from its external decoding into the internal
196//  Unicode format. Once internallized, this class provides the access
197//  methods to read in the data in various ways, maintains line and column
198//  information, and provides high performance character attribute checking
199//  methods.
200//
201//  This is NOT to be derived from.
202//
203// ---------------------------------------------------------------------------
204class XMLPARSER_EXPORT XMLReader : public XMemory
205{
206public:
207    // -----------------------------------------------------------------------
208    //  Public types
209    // -----------------------------------------------------------------------
210    enum Types
211    {
212        Type_PE
213        , Type_General
214    };
215
216    enum Sources
217    {
218        Source_Internal
219        , Source_External
220    };
221
222    enum RefFrom
223    {
224        RefFrom_Literal
225        , RefFrom_NonLiteral
226    };
227
228    enum XMLVersion
229    {
230        XMLV1_0
231        , XMLV1_1
232        , XMLV_Unknown
233    };
234
235
236    // -----------------------------------------------------------------------
237    //  Public, query methods
238    // -----------------------------------------------------------------------
239    bool isAllSpaces
240    (
241        const   XMLCh* const    toCheck
242        , const unsigned int    count
243    );
244
245    bool containsWhiteSpace
246    (
247        const   XMLCh* const    toCheck
248        , const unsigned int    count
249    );
250
251
252    bool isXMLLetter(const XMLCh toCheck);
253    bool isFirstNameChar(const XMLCh toCheck);
254    bool isNameChar(const XMLCh toCheck);
255    bool isPlainContentChar(const XMLCh toCheck);
256    bool isSpecialStartTagChar(const XMLCh toCheck);
257    bool isXMLChar(const XMLCh toCheck);
258    bool isWhitespace(const XMLCh toCheck);
259    bool isControlChar(const XMLCh toCheck);
260    bool isPublicIdChar(const XMLCh toCheck);
261
262    // -----------------------------------------------------------------------
263    //  Constructors and Destructor
264    // -----------------------------------------------------------------------
265    XMLReader
266    (
267        const   XMLCh* const          pubId
268        , const XMLCh* const          sysId
269        ,       BinInputStream* const streamToAdopt
270        , const RefFrom               from
271        , const Types                 type
272        , const Sources               source
273        , const bool                  throwAtEnd = false
274        , const bool                  calculateSrcOfs = true
275        , const XMLVersion            xmlVersion = XMLV1_0
276        ,       MemoryManager* const  manager = XMLPlatformUtils::fgMemoryManager
277    );
278
279    XMLReader
280    (
281        const   XMLCh* const          pubId
282        , const XMLCh* const          sysId
283        ,       BinInputStream* const streamToAdopt
284        , const XMLCh* const          encodingStr
285        , const RefFrom               from
286        , const Types                 type
287        , const Sources               source
288        , const bool                  throwAtEnd = false
289        , const bool                  calculateSrcOfs = true
290        , const XMLVersion            xmlVersion = XMLV1_0
291        ,       MemoryManager* const  manager = XMLPlatformUtils::fgMemoryManager
292    );
293
294    XMLReader
295    (
296        const   XMLCh* const          pubId
297        , const XMLCh* const          sysId
298        ,       BinInputStream* const streamToAdopt
299        , XMLRecognizer::Encodings    encodingEnum
300        , const RefFrom               from
301        , const Types                 type
302        , const Sources               source
303        , const bool                  throwAtEnd = false
304        , const bool                  calculateSrcOfs = true
305        , const XMLVersion            xmlVersion = XMLV1_0
306        ,       MemoryManager* const  manager = XMLPlatformUtils::fgMemoryManager
307    );
308
309    ~XMLReader();
310
311
312    // -----------------------------------------------------------------------
313    //  Character buffer management methods
314    // -----------------------------------------------------------------------
315    unsigned long charsLeftInBuffer() const;
316    bool refreshCharBuffer();
317
318
319    // -----------------------------------------------------------------------
320    //  Scanning methods
321    // -----------------------------------------------------------------------
322    bool getName(XMLBuffer& toFill, const bool token);
323    bool getNextChar(XMLCh& chGotten);
324    bool getNextCharIfNot(const XMLCh chNotToGet, XMLCh& chGotten);
325    void movePlainContentChars(XMLBuffer &dest);
326    bool getSpaces(XMLBuffer& toFill);
327    bool getUpToCharOrWS(XMLBuffer& toFill, const XMLCh toCheck);
328    bool peekNextChar(XMLCh& chGotten);
329    bool skipIfQuote(XMLCh& chGotten);
330    bool skipSpaces(bool& skippedSomething);
331    bool skippedChar(const XMLCh toSkip);
332    bool skippedSpace();
333    bool skippedString(const XMLCh* const toSkip);
334    bool peekString(const XMLCh* const toPeek);
335
336
337    // -----------------------------------------------------------------------
338    //  Getter methods
339    // -----------------------------------------------------------------------
340    XMLSSize_t getColumnNumber() const;
341    const XMLCh* getEncodingStr() const;
342    XMLSSize_t getLineNumber() const;
343    bool getNoMoreFlag() const;
344    const XMLCh* getPublicId() const;
345    unsigned int getReaderNum() const;
346    RefFrom getRefFrom() const;
347    Sources getSource() const;
348    unsigned int getSrcOffset() const;
349    const XMLCh* getSystemId() const;
350    bool getThrowAtEnd() const;
351    Types getType() const;
352
353
354    // -----------------------------------------------------------------------
355    //  Setter methods
356    // -----------------------------------------------------------------------
357    bool setEncoding
358    (
359        const   XMLCh* const    newEncoding
360    );
361    void setReaderNum(const unsigned int newNum);
362    void setThrowAtEnd(const bool newValue);
363    void setXMLVersion(const XMLVersion version);
364
365
366private:
367    // -----------------------------------------------------------------------
368    //  Unimplemented constructors and operators
369    // -----------------------------------------------------------------------
370    XMLReader(const XMLReader&);
371    XMLReader& operator=(const XMLReader&);
372
373    // ---------------------------------------------------------------------------
374    //  Class Constants
375    //
376    //  kCharBufSize
377    //      The size of the character spool buffer that we use. Its not terribly
378    //      large because its just getting filled with data from a raw byte
379    //      buffer as we go along. We don't want to decode all the text at
380    //      once before we find out that there is an error.
381    //
382    //      NOTE: This is a size in characters, not bytes.
383    //
384    //  kRawBufSize
385    //      The size of the raw buffer from which raw bytes are spooled out
386    //      as we transcode chunks of data. As it is emptied, it is filled back
387    //      in again from the source stream.
388    // ---------------------------------------------------------------------------
389    enum Constants
390    {
391        kCharBufSize        = 16 * 1024
392        , kRawBufSize       = 48 * 1024
393    };
394
395
396    // -----------------------------------------------------------------------
397    //  Private helper methods
398    // -----------------------------------------------------------------------
399    void checkForSwapped();
400
401    void doInitCharSizeChecks();
402
403    void doInitDecode();
404
405    XMLByte getNextRawByte
406    (
407        const   bool            eoiOk
408    );
409
410    void refreshRawBuffer();
411
412    void setTranscoder
413    (
414        const   XMLCh* const    newEncoding
415    );
416
417    unsigned int xcodeMoreChars
418    (
419                XMLCh* const            bufToFill
420        ,       unsigned char* const    charSizes
421        , const unsigned int            maxChars
422    );
423
424
425    // -----------------------------------------------------------------------
426    //  Data members
427    //
428    //  fCharIndex
429    //      The index into the character buffer. When this hits fCharsAvail
430    //      then its time to refill.
431    //
432    //  fCharBuf
433    //      A buffer that the reader manager fills up with transcoded
434    //      characters a small amount at a time.
435    //
436    //  fCharsAvail
437    //      The characters currently available in the character buffer.
438    //
439    //  fCharSizeBuf
440    //      This buffer is an array that contains the number of source chars
441    //      eaten to create each char in the fCharBuf buffer. So the entry
442    //      fCharSizeBuf[x] is the number of source chars that were eaten
443    //      to make the internalized char fCharBuf[x]. This only contains
444    //      useful data if fSrcOfsSupported is true.
445    //
446    //  fCurCol
447    //  fCurLine
448    //      The current line and column that we are in within this reader's
449    //      text.
450    //
451    //  fEncoding
452    //      This is the rough encoding setting. This enum is set during
453    //      construction and just tells us the rough family of encoding that
454    //      we are doing.
455    //
456    //  fEncodingStr
457    //      This is the name of the encoding we are using. It will be
458    //      provisionally set during construction, from the auto-sensed
459    //      encoding. But it might be overridden when the XMLDecl is finally
460    //      seen by the scanner. It can also be forced to a particular
461    //      encoding, in which case fForcedEncoding is set.
462    //
463    //  fForcedEncoding
464    //      If the encoding if forced then this is set and all other
465    //      information will be ignored. This encoding will be taken as
466    //      gospel. This is done by calling an alternate constructor.
467    //
468    //  fNoMore
469    //      This is set when the source text is exhausted. It lets us know
470    //      quickly that no more text is available.
471    //
472    //  fRawBufIndex
473    //      The current index into the raw byte buffer. When its equal to
474    //      fRawBytesAvail then we need to read another buffer.
475    //
476    //  fRawByteBuf
477    //      This is the raw byte buffer that is used to spool out bytes
478    //      from into the fCharBuf buffer, as we transcode in blocks.
479    //
480    //  fRawBytesAvail
481    //      The number of bytes currently available in the raw buffer. This
482    //      helps deal with the last buffer's worth, which will usually not
483    //      be a full one.
484    //
485    //  fReaderNum
486    //      Each reader from a particular reader manager (which means from a
487    //      particular document) is given a unique number. The reader manager
488    //      sets these numbers. They are used to catch things like partial
489    //      markup errors.
490    //
491    //  fRefFrom
492    //      This flag is provided in the ctor, and tells us if we represent
493    //      some entity being expanded inside a literal. Sometimes things
494    //      happen differently inside and outside literals.
495    //
496    //  fPublicId
497    //  fSystemId
498    //      These are the system and public ids of the source that this
499    //      reader is reading.
500    //
501    //  fSentTrailingSpace
502    //      If we are a PE entity being read and we not referenced from a
503    //      literal, then a leading and trailing space must be faked into the
504    //      data. This lets us know we've done the trailing space already (so
505    //      we don't just keep doing it again and again.)
506    //
507    //  fSource
508    //      Indicates whether the content this reader is spooling as already
509    //      been internalized. This will prevent multiple processing of
510    //      whitespace when an already internalized entity is being spooled
511    //      out.
512    //
513    //  fSpareChar
514    //      Some encodings can create two chars in an atomic way, e.g.
515    //      surrogate pairs. We might not be able to store both, so we store
516    //      it here until the next buffer transcoding operation.
517    //
518    //  fSrcOfsBase
519    //      This is the base offset within the source of this entity. Values
520    //      in the curent fCharSizeBuf array are relative to this value.
521    //
522    //  fSrcOfsSupported
523    //      This flag is set to indicate whether source byte offset info
524    //      is supported. For intrinsic encodings, its always set since we
525    //      can always support it. For transcoder based encodings, we ask
526    //      the transcoder if it supports it or not.
527    //
528    //  fStream
529    //      This is the input stream that provides the data for the reader.
530    //      Its always treated as a raw byte stream. The derived class will
531    //      ask for buffers of text from it and will handle making some
532    //      sense of it.
533    //
534    //  fSwapped
535    //      If the encoding is one of the ones we do intrinsically, and its
536    //      in a different byte order from our native order, then this is
537    //      set to remind us to byte swap it during transcoding.
538    //
539    //  fThrowAtEnd
540    //      Indicates whether the reader manager should throw an end of entity
541    //      exception at the end of this reader instance. This is usually
542    //      set for top level external entity references. It overrides the
543    //      reader manager's global flag that controls throwing at the end
544    //      of entities. Defaults to false.
545    //
546    //  fTranscoder
547    //      If the encoding is not one that we handle intrinsically, then
548    //      we use an an external transcoder to do it. This class is an
549    //      abstraction that allows us to use pluggable external transcoding
550    //      services (via XMLTransService in util.)
551    //
552    //  fType
553    //      Indicates whether this reader represents a PE or not. If this
554    //      flag is true and the fInLiteral flag is false, then we will put
555    //      out an extra space at the end.
556    //
557    //  fgCharCharsTable;
558    //      Pointer to XMLChar table, depends on XML version
559    //
560    //  fNEL
561    //      Boolean indicates if NEL and LSEP should be recognized as NEL
562    //
563    //  fXMLVersion
564    //      Enum to indicate if this Reader is conforming to XML 1.0 or XML 1.1
565    // -----------------------------------------------------------------------
566    unsigned int                fCharIndex;
567    XMLCh                       fCharBuf[kCharBufSize];
568    unsigned int                fCharsAvail;
569    unsigned char               fCharSizeBuf[kCharBufSize];
570    XMLSSize_t                  fCurCol;
571    XMLSSize_t                  fCurLine;
572    XMLRecognizer::Encodings    fEncoding;
573    XMLCh*                      fEncodingStr;
574    bool                        fForcedEncoding;
575    bool                        fNoMore;
576    XMLCh*                      fPublicId;
577    unsigned int                fRawBufIndex;
578    XMLByte                     fRawByteBuf[kRawBufSize];
579    unsigned int                fRawBytesAvail;
580    unsigned int                fReaderNum;
581    RefFrom                     fRefFrom;
582    bool                        fSentTrailingSpace;
583    Sources                     fSource;
584    unsigned int                fSrcOfsBase;
585    bool                        fSrcOfsSupported;
586    bool                        fCalculateSrcOfs;
587    XMLCh*                      fSystemId;
588    BinInputStream*             fStream;
589    bool                        fSwapped;
590    bool                        fThrowAtEnd;
591    XMLTranscoder*              fTranscoder;
592    Types                       fType;
593    XMLByte*                    fgCharCharsTable;
594    bool                        fNEL;
595    XMLVersion                  fXMLVersion;
596    MemoryManager*              fMemoryManager;
597};
598
599
600// ---------------------------------------------------------------------------
601//  XMLReader: Public, query methods
602// ---------------------------------------------------------------------------
603inline bool XMLReader::isNameChar(const XMLCh toCheck)
604{
605    return ((fgCharCharsTable[toCheck] & gNameCharMask) != 0);
606}
607
608inline bool XMLReader::isPlainContentChar(const XMLCh toCheck)
609{
610    return ((fgCharCharsTable[toCheck] & gPlainContentCharMask) != 0);
611}
612
613
614inline bool XMLReader::isFirstNameChar(const XMLCh toCheck)
615{
616    return ((fgCharCharsTable[toCheck] & gFirstNameCharMask) != 0);
617}
618
619inline bool XMLReader::isSpecialStartTagChar(const XMLCh toCheck)
620{
621    return ((fgCharCharsTable[toCheck] & gSpecialStartTagCharMask) != 0);
622}
623
624inline bool XMLReader::isXMLChar(const XMLCh toCheck)
625{
626    return ((fgCharCharsTable[toCheck] & gXMLCharMask) != 0);
627}
628
629inline bool XMLReader::isXMLLetter(const XMLCh toCheck)
630{
631    return ((fgCharCharsTable[toCheck] & gLetterCharMask) != 0);
632}
633
634inline bool XMLReader::isWhitespace(const XMLCh toCheck)
635{
636    return ((fgCharCharsTable[toCheck] & gWhitespaceCharMask) != 0);
637}
638
639inline bool XMLReader::isControlChar(const XMLCh toCheck)
640{
641    return ((fgCharCharsTable[toCheck] & gControlCharMask) != 0);
642}
643
644// ---------------------------------------------------------------------------
645//  XMLReader: Buffer management methods
646// ---------------------------------------------------------------------------
647inline unsigned long XMLReader::charsLeftInBuffer() const
648{
649    return fCharsAvail - fCharIndex;
650}
651
652
653// ---------------------------------------------------------------------------
654//  XMLReader: Getter methods
655// ---------------------------------------------------------------------------
656inline XMLSSize_t XMLReader::getColumnNumber() const
657{
658    return fCurCol;
659}
660
661inline const XMLCh* XMLReader::getEncodingStr() const
662{
663    return fEncodingStr;
664}
665
666inline XMLSSize_t XMLReader::getLineNumber() const
667{
668    return fCurLine;
669}
670
671inline bool XMLReader::getNoMoreFlag() const
672{
673    return fNoMore;
674}
675
676inline const XMLCh* XMLReader::getPublicId() const
677{
678    return fPublicId;
679}
680
681inline unsigned int XMLReader::getReaderNum() const
682{
683    return fReaderNum;
684}
685
686inline XMLReader::RefFrom XMLReader::getRefFrom() const
687{
688    return fRefFrom;
689}
690
691inline XMLReader::Sources XMLReader::getSource() const
692{
693    return fSource;
694}
695
696inline const XMLCh* XMLReader::getSystemId() const
697{
698    return fSystemId;
699}
700
701inline bool XMLReader::getThrowAtEnd() const
702{
703    return fThrowAtEnd;
704}
705
706inline XMLReader::Types XMLReader::getType() const
707{
708    return fType;
709}
710
711// ---------------------------------------------------------------------------
712//  XMLReader: Setter methods
713// ---------------------------------------------------------------------------
714inline void XMLReader::setReaderNum(const unsigned int newNum)
715{
716    fReaderNum = newNum;
717}
718
719inline void XMLReader::setThrowAtEnd(const bool newValue)
720{
721    fThrowAtEnd = newValue;
722}
723
724inline void XMLReader::setXMLVersion(const XMLVersion version)
725{
726    fXMLVersion = version;
727    if (version == XMLV1_1) {
728        fNEL = true;
729        fgCharCharsTable = XMLChar1_1::fgCharCharsTable1_1;
730    }
731    else {
732        fNEL = XMLChar1_0::enableNEL;
733        fgCharCharsTable = XMLChar1_0::fgCharCharsTable1_0;
734    }
735
736}
737
738
739
740// ---------------------------------------------------------------------------
741//
742//  XMLReader: movePlainContentChars()
743//
744//       Move as many plain (no special handling of any sort required) content
745//       characters as possible from this reader to the supplied destination buffer.
746//
747//       This is THE hottest performance spot in the parser.
748//
749// ---------------------------------------------------------------------------
750inline void XMLReader::movePlainContentChars(XMLBuffer &dest)
751{
752    unsigned int count = fCharIndex;
753
754    while (fCharIndex < fCharsAvail)
755    {
756        if (!isPlainContentChar(fCharBuf[fCharIndex]))
757            break;
758        fCharIndex++;
759    }
760
761    if (count != fCharIndex)
762    {
763        fCurCol    += (fCharIndex - count);
764        dest.append(&fCharBuf[count], fCharIndex - count);
765    }
766}
767
768
769// ---------------------------------------------------------------------------
770//  XMLReader: getNextCharIfNot() method inlined for speed
771// ---------------------------------------------------------------------------
772inline bool XMLReader::getNextCharIfNot(const XMLCh chNotToGet, XMLCh& chGotten)
773{
774    //
775    //  See if there is at least a char in the buffer. Else, do the buffer
776    //  reload logic.
777    //
778    if (fCharIndex >= fCharsAvail)
779    {
780        // If fNoMore is set, then we have nothing else to give
781        if (fNoMore)
782            return false;
783
784        // Try to refresh
785        if (!refreshCharBuffer())
786            return false;
787    }
788
789    // Check the next char
790    if (fCharBuf[fCharIndex] == chNotToGet)
791        return false;
792
793    // Its not the one we want to skip so bump the index
794    chGotten = fCharBuf[fCharIndex++];
795
796    // Handle end of line normalization and line/col member maintenance.
797    if (chGotten == chCR)
798    {
799        //
800        //  Do the normalization. We return chLF regardless of which was
801        //  found. We also eat a chCR followed by an chLF.
802        //
803        //  We only do this if the content being spooled is not already
804        //  internalized.
805        //
806        if (fSource == Source_External)
807        {
808            //
809            //  See if we have another char left. If not, don't bother.
810            //  Else, see if its an chLF to eat. If it is, bump the
811            //  index again.
812            //
813            if (fCharIndex < fCharsAvail)
814            {
815                if (fCharBuf[fCharIndex] == chLF
816                    || ((fCharBuf[fCharIndex] == chNEL) && fNEL))
817                    fCharIndex++;
818            }
819             else
820            {
821                if (refreshCharBuffer())
822                {
823                    if (fCharBuf[fCharIndex] == chLF
824                        || ((fCharBuf[fCharIndex] == chNEL) && fNEL))
825                        fCharIndex++;
826                }
827            }
828
829            // And return just an chLF
830            chGotten = chLF;
831        }
832
833        // And handle the line/col stuff
834        fCurCol = 1;
835        fCurLine++;
836    }
837     else if (chGotten == chLF
838              || ((chGotten == chNEL || chGotten == chLineSeparator) && fNEL))
839    {
840        chGotten = chLF;
841        fCurLine++;
842        fCurCol = 1;
843    }
844     else if (chGotten)
845    {
846        //
847        //  Only do this is not a null char. Null chars are not part of the
848        //  real content. They are just marker characters inserted into
849        //  the stream.
850        //
851        fCurCol++;
852    }
853    return true;
854}
855
856// ---------------------------------------------------------------------------
857//  XMLReader: getNextChar() method inlined for speed
858// ---------------------------------------------------------------------------
859inline bool XMLReader::getNextChar(XMLCh& chGotten)
860{
861    //
862    //  See if there is at least a char in the buffer. Else, do the buffer
863    //  reload logic.
864    //
865    if (fCharIndex >= fCharsAvail)
866    {
867        // If fNoMore is set, then we have nothing else to give
868        if (fNoMore)
869            return false;
870
871        // Try to refresh
872        if (!refreshCharBuffer())
873            return false;
874    }
875
876    chGotten = fCharBuf[fCharIndex++];
877
878    // Handle end of line normalization and line/col member maintenance.
879    if (chGotten == chCR)
880    {
881        //
882        //  Do the normalization. We return chLF regardless of which was
883        //  found. We also eat a chCR followed by an chLF.
884        //
885        //  We only do this if the content being spooled is not already
886        //  internalized.
887        //
888        if (fSource == Source_External)
889        {
890            //
891            //  See if we have another char left. If not, don't bother.
892            //  Else, see if its an chLF to eat. If it is, bump the
893            //  index again.
894            //
895            if ((fCharIndex < fCharsAvail) || refreshCharBuffer())
896            {
897                if (fCharBuf[fCharIndex] == chLF
898                    || ((fCharBuf[fCharIndex] == chNEL) && fNEL))
899                    fCharIndex++;
900            }
901
902            // And return just an chLF
903            chGotten = chLF;
904        }
905
906        // And handle the line/col stuff
907        fCurCol = 1;
908        fCurLine++;
909    }
910     else if (chGotten == chLF
911              || ((chGotten == chNEL || chGotten == chLineSeparator) && fNEL))
912    {
913        chGotten = chLF;
914        fCurLine++;
915        fCurCol = 1;
916    }
917     else if (chGotten)
918    {
919        //
920        //  Only do this is not a null char. Null chars are not part of the
921        //  real content. They are just marker characters inserted into
922        //  the stream.
923        //
924        fCurCol++;
925    }
926    return true;
927}
928
929
930// ---------------------------------------------------------------------------
931//  XMLReader: peekNextChar() method inlined for speed
932// ---------------------------------------------------------------------------
933inline bool XMLReader::peekNextChar(XMLCh& chGotten)
934{
935    //
936    //  If there is something still in the buffer, get it. Else do the reload
937    //  scenario.
938    //
939    if (fCharIndex >= fCharsAvail)
940    {
941        // Try to refresh the buffer
942        if (!refreshCharBuffer())
943        {
944            chGotten = chNull;
945            return false;
946        }
947    }
948
949    chGotten = fCharBuf[fCharIndex];
950
951    //
952    //  Even though we are only peeking, we have to act the same as the
953    //  normal char get method in regards to newline normalization, though
954    //  its not as complicated as the actual character getting method's.
955    //
956    if ((chGotten == chCR || ((chGotten == chNEL || chGotten == chLineSeparator) && fNEL))
957        && (fSource == Source_External))
958        chGotten = chLF;
959
960    return true;
961}
962
963
964XERCES_CPP_NAMESPACE_END
965
966#endif
Note: See TracBrowser for help on using the repository browser.