source: NonGTP/Xerces/xerces/include/xercesc/util/XMLUri.hpp @ 358

Revision 358, 25.0 KB checked in by bittner, 19 years ago (diff)

xerces added

Line 
1/*
2 * Copyright 2001-2004 The Apache Software Foundation.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *      http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17/*
18 * $Id: XMLUri.hpp,v 1.18 2004/09/08 13:56:25 peiyongz Exp $
19 * $Log: XMLUri.hpp,v $
20 * Revision 1.18  2004/09/08 13:56:25  peiyongz
21 * Apache License Version 2.0
22 *
23 * Revision 1.17  2004/05/25 18:11:47  peiyongz
24 * normalizeURI() added
25 *
26 * Revision 1.16  2004/01/12 22:01:02  cargilld
27 * Minor performance change for handling reserved and unreserved characters.
28 *
29 * Revision 1.15  2003/12/17 00:18:35  cargilld
30 * Update to memory management so that the static memory manager (one used to call Initialize) is only for static data.
31 *
32 * Revision 1.14  2003/12/11 22:21:25  neilg
33 * fixes for the URI implementation to take registry names into account; much thanks to Michael Glavassevich
34 *
35 * Revision 1.13  2003/12/02 17:50:21  neilg
36 * additional fix for bug 25118; once again, thanks to Jeroen Whitmond
37 *
38 * Revision 1.12  2003/10/01 00:20:41  knoaman
39 * Add a static method to check whether a given string is a valid URI.
40 *
41 * Revision 1.11  2003/09/25 22:23:25  peiyongz
42 * Implementation of Serialization/Deserialization
43 *
44 * Revision 1.10  2003/07/25 10:15:16  gareth
45 * Patch by Michael Glavassevich
46 *
47 * The patch fixes Bugzilla #19787, #20006, #20009, #20010 and #20287, and
48 * several other issues. A summary of the changes is listed below:
49 *
50 * 1. Added '[' and ']' to reserved characters as per RFC 2732.
51 * 2. '[' and ']' added in RFC 2732, are not allowed in path segments, but
52 * may appear in the opaque part.
53 * 3. No URI can begin with a ':'.
54 * 4. URI has no scheme if ':' occurs in a URI after '?' or '#', it's part of
55 * the query string or fragment.
56 * 5. Whitespace (even escaped as %20) is not permitted in the authority
57 * portion of a URI.
58 * 6. IPv4 addresses must match 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT "."
59 * 1*3DIGIT. Since RFC 2732.
60 * 7. IPv4 addresses are 32-bit, therefore no segment may be larger than 255.
61 * This isn't expressed by the grammar.
62 * 8. Hostnames cannot end with a '-'.
63 * 9. Labels in a hostname must be 63 bytes or less [RFC 1034].
64 * 10. Hostnames may be no longer than 255 bytes [RFC 1034]. (That
65 * restriction was already there. I just moved it inwards.
66 * 11. Added support for IPv6 references added in RFC 2732. URIs such as
67 * http://[::ffff:1.2.3.4] are valid. The BNF in RFC 2373 isn't correct. IPv6
68 * addresses are read according to section 2.2 of RFC 2373.
69 *
70 * Revision 1.9  2003/05/16 06:01:53  knoaman
71 * Partial implementation of the configurable memory manager.
72 *
73 * Revision 1.8  2003/05/15 19:07:46  knoaman
74 * Partial implementation of the configurable memory manager.
75 *
76 * Revision 1.7  2003/01/06 19:43:18  tng
77 * New feature StandardUriConformant to force strict standard uri conformance.
78 *
79 * Revision 1.6  2002/11/21 15:42:39  gareth
80 * Implemented copy constructor and operator =. Patch by Jennifer Schachter.
81 *
82 * Revision 1.5  2002/11/04 15:22:05  tng
83 * C++ Namespace Support.
84 *
85 * Revision 1.4  2002/09/23 18:41:00  tng
86 * DOM L3: Support baseURI.   Add fURIText to XMLUri.   Added by Gareth Reakes and Thomas Ford.
87 *
88 * Revision 1.3  2002/08/23 20:45:24  tng
89 * .Memory leak fix: XMLUri data not deleted if constructor failed.
90 *
91 * Revision 1.2  2002/02/20 18:17:02  tng
92 * [Bug 5977] Warnings on generating apiDocs.
93 *
94 * Revision 1.1.1.1  2002/02/01 22:22:17  peiyongz
95 * sane_include
96 *
97 * Revision 1.3  2001/08/29 19:03:03  peiyongz
98 * Bugzilla# 2816:on AIX 4.2, xlC 3 r ev.1, Compilation error on inline method
99 *
100 * Revision 1.2  2001/08/16 14:09:44  peiyongz
101 * Removed unused ctors and methods
102 *
103 * Revision 1.1  2001/08/10 16:23:41  peiyongz
104 * XMLUri: creation
105 *
106 *
107 */
108
109#if !defined(XMLURI_HPP)
110#define XMLURI_HPP
111
112#include <xercesc/util/XMemory.hpp>
113#include <xercesc/util/XMLString.hpp>
114
115#include <xercesc/internal/XSerializable.hpp>
116#include <xercesc/framework/XMLBuffer.hpp>
117
118XERCES_CPP_NAMESPACE_BEGIN
119
120/*
121 * This class is a direct port of Java's URI class, to distinguish
122 * itself from the XMLURL, we use the name XMLUri instead of
123 * XMLURI.
124 *
125 * TODO: how to relate XMLUri and XMLURL since URL is part of URI.
126 *
127 */
128
129class XMLUTIL_EXPORT XMLUri : public XSerializable, public XMemory
130{
131public:
132
133    // -----------------------------------------------------------------------
134    //  Constructors and Destructor
135    // -----------------------------------------------------------------------
136
137    /**
138     * Construct a new URI from a URI specification string.
139     *
140     * If the specification follows the "generic URI" syntax, (two slashes
141     * following the first colon), the specification will be parsed
142     * accordingly - setting the
143     *                           scheme,
144     *                           userinfo,
145     *                           host,
146     *                           port,
147     *                           path,
148     *                           querystring and
149     *                           fragment
150     * fields as necessary.
151     *
152     * If the specification does not follow the "generic URI" syntax,
153     * the specification is parsed into a
154     *                           scheme and
155     *                           scheme-specific part (stored as the path) only.
156     *
157     * @param uriSpec the URI specification string (cannot be null or empty)
158     *
159     * @param manager Pointer to the memory manager to be used to
160     *                allocate objects.
161     *
162     * ctor# 2
163     *
164     */
165    XMLUri(const XMLCh* const    uriSpec,
166           MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager);
167
168    /**
169     * Construct a new URI from a base URI and a URI specification string.
170     * The URI specification string may be a relative URI.
171     *
172     * @param baseURI the base URI (cannot be null if uriSpec is null or
173     *                empty)
174     *
175     * @param uriSpec the URI specification string (cannot be null or
176     *                empty if base is null)
177     *
178     * @param manager Pointer to the memory manager to be used to
179     *                allocate objects.
180     *
181     * ctor# 7 relative ctor
182     *
183     */
184    XMLUri(const XMLUri* const  baseURI
185         , const XMLCh* const   uriSpec
186         , MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager);
187
188    /**
189     * Copy constructor
190     */
191    XMLUri(const XMLUri& toCopy);
192    XMLUri& operator=(const XMLUri& toAssign);
193
194    virtual ~XMLUri();
195
196    // -----------------------------------------------------------------------
197    //  Getter methods
198    // -----------------------------------------------------------------------
199    /**
200     * Get the URI as a string specification. See RFC 2396 Section 5.2.
201     *
202     * @return the URI string specification
203     */
204    const XMLCh* getUriText() const;
205
206    /**
207     * Get the scheme for this URI.
208     *
209     * @return the scheme for this URI
210     */
211     const XMLCh* getScheme() const;
212
213    /**
214     * Get the userinfo for this URI.
215     *
216     * @return the userinfo for this URI (null if not specified).
217     */
218     const XMLCh* getUserInfo() const;
219
220
221    /**
222     * Get the host for this URI.
223     *
224     * @return the host for this URI (null if not specified).
225     */
226     const XMLCh* getHost() const;
227
228    /**
229     * Get the port for this URI.
230     *
231     * @return the port for this URI (-1 if not specified).
232     */
233     int getPort() const;
234     
235    /**
236     * Get the registry based authority for this URI.
237     *
238     * @return the registry based authority (null if not specified).
239     */
240     const XMLCh* getRegBasedAuthority() const;
241
242    /**
243     * Get the path for this URI. Note that the value returned is the path
244     * only and does not include the query string or fragment.
245     *
246     * @return the path for this URI.
247     */
248     const XMLCh* getPath() const;
249
250    /**
251     * Get the query string for this URI.
252     *
253     * @return the query string for this URI. Null is returned if there
254     *         was no "?" in the URI spec, empty string if there was a
255     *         "?" but no query string following it.
256     */
257     const XMLCh* getQueryString() const;
258
259    /**
260     * Get the fragment for this URI.
261     *
262     * @return the fragment for this URI. Null is returned if there
263     *         was no "#" in the URI spec, empty string if there was a
264     *         "#" but no fragment following it.
265     */
266     const XMLCh* getFragment() const;
267
268    // -----------------------------------------------------------------------
269    //  Setter methods
270    // -----------------------------------------------------------------------
271
272    /**
273     * Set the scheme for this URI. The scheme is converted to lowercase
274     * before it is set.
275     *
276     * @param newScheme the scheme for this URI (cannot be null)
277     *
278     */
279     void setScheme(const XMLCh* const newScheme);
280
281    /**
282     * Set the userinfo for this URI. If a non-null value is passed in and
283     * the host value is null, then an exception is thrown.
284     *
285     * @param newUserInfo the userinfo for this URI
286     *
287     */
288     void setUserInfo(const XMLCh* const newUserInfo);
289
290    /**
291     * Set the host for this URI. If null is passed in, the userinfo
292     * field is also set to null and the port is set to -1.
293     *
294     * Note: This method overwrites registry based authority if it
295     * previously existed in this URI.
296     *
297     * @param newHost the host for this URI
298     *
299     */
300     void setHost(const XMLCh* const newHost);
301
302    /**
303     * Set the port for this URI. -1 is used to indicate that the port is
304     * not specified, otherwise valid port numbers are  between 0 and 65535.
305     * If a valid port number is passed in and the host field is null,
306     * an exception is thrown.
307     *
308     * @param newPort the port number for this URI
309     *
310     */
311     void setPort(int newPort);
312     
313    /**
314     * Sets the registry based authority for this URI.
315     *
316     * Note: This method overwrites server based authority
317     * if it previously existed in this URI.
318     *
319     * @param newRegAuth the registry based authority for this URI
320     */
321     void setRegBasedAuthority(const XMLCh* const newRegAuth);
322
323    /**
324     * Set the path for this URI.
325     *
326     * If the supplied path is null, then the
327     * query string and fragment are set to null as well.
328     *
329     * If the supplied path includes a query string and/or fragment,
330     * these fields will be parsed and set as well.
331     *
332     * Note:
333     *
334     * For URIs following the "generic URI" syntax, the path
335     * specified should start with a slash.
336     *
337     * For URIs that do not follow the generic URI syntax, this method
338     * sets the scheme-specific part.
339     *
340     * @param newPath the path for this URI (may be null)
341     *
342     */
343     void setPath(const XMLCh* const newPath);
344
345    /**
346     * Set the query string for this URI. A non-null value is valid only
347     * if this is an URI conforming to the generic URI syntax and
348     * the path value is not null.
349     *
350     * @param newQueryString the query string for this URI
351     *
352     */
353     void setQueryString(const XMLCh* const newQueryString);
354
355    /**
356     * Set the fragment for this URI. A non-null value is valid only
357     * if this is a URI conforming to the generic URI syntax and
358     * the path value is not null.
359     *
360     * @param newFragment the fragment for this URI
361     *
362     */
363     void setFragment(const XMLCh* const newFragment);
364
365     // -----------------------------------------------------------------------
366    //  Miscellaneous methods
367    // -----------------------------------------------------------------------
368
369    /**
370     * Determine whether a given string contains only URI characters (also
371     * called "uric" in RFC 2396). uric consist of all reserved
372     * characters, unreserved characters and escaped characters.
373     *
374     * @return true if the string is comprised of uric, false otherwise
375     */
376    static bool isURIString(const XMLCh* const uric);
377
378    /**
379     * Determine whether a given string is a valid URI
380     */
381    static bool isValidURI( const XMLUri* const baseURI
382                          , const XMLCh* const uriStr);
383    /**
384     * Determine whether a given string is a valid URI
385     */
386    static bool isValidURI( bool haveBaseURI
387                          , const XMLCh* const uriStr);
388
389
390    static void normalizeURI(const XMLCh*     const systemURI,
391                                   XMLBuffer&       normalizedURI);
392
393    /***
394     * Support for Serialization/De-serialization
395     ***/
396    DECL_XSERIALIZABLE(XMLUri)
397
398    XMLUri(MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager);
399
400private:
401
402    static const XMLCh MARK_OR_RESERVED_CHARACTERS[];
403    static const XMLCh RESERVED_CHARACTERS[];
404    static const XMLCh MARK_CHARACTERS[];
405    static const XMLCh SCHEME_CHARACTERS[];
406    static const XMLCh USERINFO_CHARACTERS[];
407    static const XMLCh REG_NAME_CHARACTERS[];
408    static const XMLCh PATH_CHARACTERS[];
409
410    //helper method for getUriText
411    void buildFullText();
412
413    // -----------------------------------------------------------------------
414    //  Private helper methods
415    // -----------------------------------------------------------------------
416
417    /**
418     * Determine whether a character is a reserved character:
419     *
420     * @return true if the string contains any reserved characters
421     */
422    static bool isReservedCharacter(const XMLCh theChar);
423   
424    /**
425     * Determine whether a character is a path character:
426     *
427     * @return true if the character is path character
428     */
429    static bool isPathCharacter(const XMLCh theChar);
430
431    /**
432     * Determine whether a char is an unreserved character.
433     *
434     * @return true if the char is unreserved, false otherwise
435     */
436    static bool isUnreservedCharacter(const XMLCh theChar);
437
438    /**
439     * Determine whether a char is an reserved or unreserved character.
440     *
441     * @return true if the char is reserved or unreserved, false otherwise
442     */               
443    static bool isReservedOrUnreservedCharacter(const XMLCh theChar);
444
445    /**
446     * Determine whether a scheme conforms to the rules for a scheme name.
447     * A scheme is conformant if it starts with an alphanumeric, and
448     * contains only alphanumerics, '+','-' and '.'.
449     *
450     * @return true if the scheme is conformant, false otherwise
451     */
452    static bool isConformantSchemeName(const XMLCh* const scheme);
453
454    /**
455     * Determine whether a userInfo conforms to the rules for a userinfo.
456     *
457     * @return true if the scheme is conformant, false otherwise
458     */
459    static void isConformantUserInfo(const XMLCh* const userInfo
460        , MemoryManager* const manager);
461   
462    /**
463     * Determines whether the components host, port, and user info
464     * are valid as a server authority.
465     *
466     * @return true if the given host, port, and userinfo compose
467     * a valid server authority
468     */
469    static bool isValidServerBasedAuthority(const XMLCh* const host
470                                           , const int hostLen
471                                           , const int port
472                                           , const XMLCh* const userinfo
473                                           , const int userLen);
474                                           
475    /**
476     * Determines whether the components host, port, and user info
477     * are valid as a server authority.
478     *
479     * @return true if the given host, port, and userinfo compose
480     * a valid server authority
481     */
482    static bool isValidServerBasedAuthority(const XMLCh* const host
483                                           , const int port
484                                           , const XMLCh* const userinfo
485                                           , MemoryManager* const manager);
486     
487   /**
488    * Determines whether the given string is a registry based authority.
489    *
490    * @param authority the authority component of a URI
491    *
492    * @return true if the given string is a registry based authority
493    */
494    static bool isValidRegistryBasedAuthority(const XMLCh* const authority
495                                             , const int authLen);
496
497   /**
498    * Determines whether the given string is a registry based authority.
499    *
500    * @param authority the authority component of a URI
501    *
502    * @return true if the given string is a registry based authority
503    */
504    static bool isValidRegistryBasedAuthority(const XMLCh* const authority);
505
506    /**
507     * Determine whether a string is syntactically capable of representing
508     * a valid IPv4 address, IPv6 reference or the domain name of a network host.
509     *
510     * A valid IPv4 address consists of four decimal digit groups
511     * separated by a '.'.
512     *
513     * See RFC 2732 Section 3, and RFC 2373 Section 2.2, for the
514     * definition of IPv6 references.
515     *
516     * A hostname consists of domain labels (each of which must begin and
517     * end with an alphanumeric but may contain '-') separated by a '.'.
518     * See RFC 2396 Section 3.2.2.
519     *
520     * @return true if the string is a syntactically valid IPv4 address
521     *              or hostname
522     */
523     static bool isWellFormedAddress(const XMLCh* const addr
524         , MemoryManager* const manager);
525     
526    /**
527     * Determines whether a string is an IPv4 address as defined by
528     * RFC 2373, and under the further constraint that it must be a 32-bit
529     * address. Though not expressed in the grammar, in order to satisfy
530     * the 32-bit address constraint, each segment of the address cannot
531     * be greater than 255 (8 bits of information).
532     *
533     * @return true if the string is a syntactically valid IPv4 address
534     */
535     static bool isWellFormedIPv4Address(const XMLCh* const addr, const int length);
536     
537    /**
538     * Determines whether a string is an IPv6 reference as defined
539     * by RFC 2732, where IPv6address is defined in RFC 2373. The
540     * IPv6 address is parsed according to Section 2.2 of RFC 2373,
541     * with the additional constraint that the address be composed of
542     * 128 bits of information.
543     *
544     * Note: The BNF expressed in RFC 2373 Appendix B does not
545     * accurately describe section 2.2, and was in fact removed from
546     * RFC 3513, the successor of RFC 2373.
547     *
548     * @return true if the string is a syntactically valid IPv6 reference
549     */
550     static bool isWellFormedIPv6Reference(const XMLCh* const addr, const int length);
551     
552    /**
553     * Helper function for isWellFormedIPv6Reference which scans the
554     * hex sequences of an IPv6 address. It returns the index of the
555     * next character to scan in the address, or -1 if the string
556     * cannot match a valid IPv6 address.
557     *
558     * @param address the string to be scanned
559     * @param index the beginning index (inclusive)
560     * @param end the ending index (exclusive)
561     * @param counter a counter for the number of 16-bit sections read
562     * in the address
563     *
564     * @return the index of the next character to scan, or -1 if the
565     * string cannot match a valid IPv6 address
566     */
567     static int scanHexSequence (const XMLCh* const addr, int index, int end, int& counter);
568
569    /**
570     * Get the indicator as to whether this URI uses the "generic URI"
571     * syntax.
572     *
573     * @return true if this URI uses the "generic URI" syntax, false
574     *         otherwise
575     */
576     bool isGenericURI();
577
578    // -----------------------------------------------------------------------
579    //  Miscellaneous methods
580    // -----------------------------------------------------------------------
581
582    /**
583     * Initialize all fields of this URI from another URI.
584     *
585     * @param toCopy the URI to copy (cannot be null)
586     */
587     void initialize(const XMLUri& toCopy);
588
589    /**
590     * Initializes this URI from a base URI and a URI specification string.
591     * See RFC 2396 Section 4 and Appendix B for specifications on parsing
592     * the URI and Section 5 for specifications on resolving relative URIs
593     * and relative paths.
594     *
595     * @param baseURI the base URI (may be null if uriSpec is an absolute
596     *               URI)
597     *
598     * @param uriSpec the URI spec string which may be an absolute or
599     *                  relative URI (can only be null/empty if base
600     *                  is not null)
601     *
602     */
603     void initialize(const XMLUri* const baseURI
604                   , const XMLCh*  const uriSpec);
605
606    /**
607     * Initialize the scheme for this URI from a URI string spec.
608     *
609     * @param uriSpec the URI specification (cannot be null)
610     *
611     */
612     void initializeScheme(const XMLCh* const uriSpec);
613
614    /**
615     * Initialize the authority (userinfo, host and port) for this
616     * URI from a URI string spec.
617     *
618     * @param uriSpec the URI specification (cannot be null)
619     *
620     */
621     void initializeAuthority(const XMLCh* const uriSpec);
622
623    /**
624     * Initialize the path for this URI from a URI string spec.
625     *
626     * @param uriSpec the URI specification (cannot be null)
627     *
628     */
629     void initializePath(const XMLCh* const uriSpec);
630
631     /**
632      * cleanup the data variables
633      *
634      */
635     void cleanUp();
636
637    static bool isConformantSchemeName(const XMLCh* const scheme,
638                                       const int schemeLen);
639    static bool processScheme(const XMLCh* const uriStr, int& index);
640    static bool processAuthority(const XMLCh* const uriStr, const int authLen);
641    static bool isWellFormedAddress(const XMLCh* const addr, const int addrLen);
642    static bool processPath(const XMLCh* const pathStr, const int pathStrLen,
643                            const bool isSchemePresent);
644
645    // -----------------------------------------------------------------------
646    //  Data members
647    //
648    //  for all the data member, we own it,
649    //  responsible for the creation and/or deletion for
650    //  the memory allocated.
651    //
652    // -----------------------------------------------------------------------
653    XMLCh*          fScheme;
654    XMLCh*          fUserInfo;
655    XMLCh*          fHost;
656    int             fPort;
657    XMLCh*          fRegAuth;
658    XMLCh*          fPath;
659    XMLCh*          fQueryString;
660    XMLCh*          fFragment;
661    XMLCh*          fURIText;
662    MemoryManager*  fMemoryManager;
663};
664
665// ---------------------------------------------------------------------------
666//  XMLUri: Getter methods
667// ---------------------------------------------------------------------------
668inline const XMLCh* XMLUri::getScheme() const
669{
670    return fScheme;
671}
672
673inline const XMLCh* XMLUri::getUserInfo() const
674{
675        return fUserInfo;
676}
677
678inline const XMLCh* XMLUri::getHost() const
679{
680        return fHost;
681}
682
683inline int XMLUri::getPort() const
684{
685        return fPort;
686}
687
688inline const XMLCh* XMLUri::getRegBasedAuthority() const
689{
690        return fRegAuth;
691}
692
693inline const XMLCh* XMLUri::getPath() const
694{
695        return fPath;
696}
697
698inline const XMLCh* XMLUri::getQueryString() const
699{
700        return fQueryString;
701}
702
703inline const XMLCh* XMLUri::getFragment() const
704{
705        return fFragment;
706}
707
708inline const XMLCh* XMLUri::getUriText() const
709{
710    //
711    //  Fault it in if not already. Since this is a const method and we
712    //  can't use mutable members due the compilers we have to support,
713    //  we have to cast off the constness.
714    //
715    if (!fURIText)
716        ((XMLUri*)this)->buildFullText();
717
718    return fURIText;
719}
720
721// ---------------------------------------------------------------------------
722//  XMLUri: Helper methods
723// ---------------------------------------------------------------------------
724inline bool XMLUri::isReservedOrUnreservedCharacter(const XMLCh theChar)
725{
726   return (XMLString::isAlphaNum(theChar) ||
727           XMLString::indexOf(MARK_OR_RESERVED_CHARACTERS, theChar) != -1);
728}
729
730inline bool XMLUri::isReservedCharacter(const XMLCh theChar)
731{
732    return (XMLString::indexOf(RESERVED_CHARACTERS, theChar) != -1);
733}
734
735inline bool XMLUri::isPathCharacter(const XMLCh theChar)
736{
737    return (XMLString::indexOf(PATH_CHARACTERS, theChar) != -1);
738}
739
740inline bool XMLUri::isUnreservedCharacter(const XMLCh theChar)
741{
742    return (XMLString::isAlphaNum(theChar) ||
743            XMLString::indexOf(MARK_CHARACTERS, theChar) != -1);
744}
745
746XERCES_CPP_NAMESPACE_END
747
748#endif
Note: See TracBrowser for help on using the repository browser.