source: NonGTP/Xerces/xerces-c_2_8_0/include/xercesc/util/XMLUri.hpp @ 2674

Revision 2674, 21.0 KB checked in by mattausch, 16 years ago (diff)
Line 
1/*
2 * Licensed to the Apache Software Foundation (ASF) under one or more
3 * contributor license agreements.  See the NOTICE file distributed with
4 * this work for additional information regarding copyright ownership.
5 * The ASF licenses this file to You under the Apache License, Version 2.0
6 * (the "License"); you may not use this file except in compliance with
7 * the License.  You may obtain a copy of the License at
8 *
9 *      http://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18/*
19 * $Id: XMLUri.hpp 568078 2007-08-21 11:43:25Z amassari $
20 */
21
22#if !defined(XMLURI_HPP)
23#define XMLURI_HPP
24
25#include <xercesc/util/XMemory.hpp>
26#include <xercesc/util/XMLString.hpp>
27
28#include <xercesc/internal/XSerializable.hpp>
29#include <xercesc/framework/XMLBuffer.hpp>
30
31XERCES_CPP_NAMESPACE_BEGIN
32
33/*
34 * This class is a direct port of Java's URI class, to distinguish
35 * itself from the XMLURL, we use the name XMLUri instead of
36 * XMLURI.
37 *
38 * TODO: how to relate XMLUri and XMLURL since URL is part of URI.
39 *
40 */
41
42class XMLUTIL_EXPORT XMLUri : public XSerializable, public XMemory
43{
44public:
45
46    // -----------------------------------------------------------------------
47    //  Constructors and Destructor
48    // -----------------------------------------------------------------------
49
50    /**
51     * Construct a new URI from a URI specification string.
52     *
53     * If the specification follows the "generic URI" syntax, (two slashes
54     * following the first colon), the specification will be parsed
55     * accordingly - setting the
56     *                           scheme,
57     *                           userinfo,
58     *                           host,
59     *                           port,
60     *                           path,
61     *                           querystring and
62     *                           fragment
63     * fields as necessary.
64     *
65     * If the specification does not follow the "generic URI" syntax,
66     * the specification is parsed into a
67     *                           scheme and
68     *                           scheme-specific part (stored as the path) only.
69     *
70     * @param uriSpec the URI specification string (cannot be null or empty)
71     *
72     * @param manager Pointer to the memory manager to be used to
73     *                allocate objects.
74     *
75     * ctor# 2
76     *
77     */
78    XMLUri(const XMLCh* const    uriSpec,
79           MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager);
80
81    /**
82     * Construct a new URI from a base URI and a URI specification string.
83     * The URI specification string may be a relative URI.
84     *
85     * @param baseURI the base URI (cannot be null if uriSpec is null or
86     *                empty)
87     *
88     * @param uriSpec the URI specification string (cannot be null or
89     *                empty if base is null)
90     *
91     * @param manager Pointer to the memory manager to be used to
92     *                allocate objects.
93     *
94     * ctor# 7 relative ctor
95     *
96     */
97    XMLUri(const XMLUri* const  baseURI
98         , const XMLCh* const   uriSpec
99         , MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager);
100
101    /**
102     * Copy constructor
103     */
104    XMLUri(const XMLUri& toCopy);
105    XMLUri& operator=(const XMLUri& toAssign);
106
107    virtual ~XMLUri();
108
109    // -----------------------------------------------------------------------
110    //  Getter methods
111    // -----------------------------------------------------------------------
112    /**
113     * Get the URI as a string specification. See RFC 2396 Section 5.2.
114     *
115     * @return the URI string specification
116     */
117    const XMLCh* getUriText() const;
118
119    /**
120     * Get the scheme for this URI.
121     *
122     * @return the scheme for this URI
123     */
124     const XMLCh* getScheme() const;
125
126    /**
127     * Get the userinfo for this URI.
128     *
129     * @return the userinfo for this URI (null if not specified).
130     */
131     const XMLCh* getUserInfo() const;
132
133
134    /**
135     * Get the host for this URI.
136     *
137     * @return the host for this URI (null if not specified).
138     */
139     const XMLCh* getHost() const;
140
141    /**
142     * Get the port for this URI.
143     *
144     * @return the port for this URI (-1 if not specified).
145     */
146     int getPort() const;
147     
148    /**
149     * Get the registry based authority for this URI.
150     *
151     * @return the registry based authority (null if not specified).
152     */
153     const XMLCh* getRegBasedAuthority() const;
154
155    /**
156     * Get the path for this URI. Note that the value returned is the path
157     * only and does not include the query string or fragment.
158     *
159     * @return the path for this URI.
160     */
161     const XMLCh* getPath() const;
162
163    /**
164     * Get the query string for this URI.
165     *
166     * @return the query string for this URI. Null is returned if there
167     *         was no "?" in the URI spec, empty string if there was a
168     *         "?" but no query string following it.
169     */
170     const XMLCh* getQueryString() const;
171
172    /**
173     * Get the fragment for this URI.
174     *
175     * @return the fragment for this URI. Null is returned if there
176     *         was no "#" in the URI spec, empty string if there was a
177     *         "#" but no fragment following it.
178     */
179     const XMLCh* getFragment() const;
180
181    // -----------------------------------------------------------------------
182    //  Setter methods
183    // -----------------------------------------------------------------------
184
185    /**
186     * Set the scheme for this URI. The scheme is converted to lowercase
187     * before it is set.
188     *
189     * @param newScheme the scheme for this URI (cannot be null)
190     *
191     */
192     void setScheme(const XMLCh* const newScheme);
193
194    /**
195     * Set the userinfo for this URI. If a non-null value is passed in and
196     * the host value is null, then an exception is thrown.
197     *
198     * @param newUserInfo the userinfo for this URI
199     *
200     */
201     void setUserInfo(const XMLCh* const newUserInfo);
202
203    /**
204     * Set the host for this URI. If null is passed in, the userinfo
205     * field is also set to null and the port is set to -1.
206     *
207     * Note: This method overwrites registry based authority if it
208     * previously existed in this URI.
209     *
210     * @param newHost the host for this URI
211     *
212     */
213     void setHost(const XMLCh* const newHost);
214
215    /**
216     * Set the port for this URI. -1 is used to indicate that the port is
217     * not specified, otherwise valid port numbers are  between 0 and 65535.
218     * If a valid port number is passed in and the host field is null,
219     * an exception is thrown.
220     *
221     * @param newPort the port number for this URI
222     *
223     */
224     void setPort(int newPort);
225     
226    /**
227     * Sets the registry based authority for this URI.
228     *
229     * Note: This method overwrites server based authority
230     * if it previously existed in this URI.
231     *
232     * @param newRegAuth the registry based authority for this URI
233     */
234     void setRegBasedAuthority(const XMLCh* const newRegAuth);
235
236    /**
237     * Set the path for this URI.
238     *
239     * If the supplied path is null, then the
240     * query string and fragment are set to null as well.
241     *
242     * If the supplied path includes a query string and/or fragment,
243     * these fields will be parsed and set as well.
244     *
245     * Note:
246     *
247     * For URIs following the "generic URI" syntax, the path
248     * specified should start with a slash.
249     *
250     * For URIs that do not follow the generic URI syntax, this method
251     * sets the scheme-specific part.
252     *
253     * @param newPath the path for this URI (may be null)
254     *
255     */
256     void setPath(const XMLCh* const newPath);
257
258    /**
259     * Set the query string for this URI. A non-null value is valid only
260     * if this is an URI conforming to the generic URI syntax and
261     * the path value is not null.
262     *
263     * @param newQueryString the query string for this URI
264     *
265     */
266     void setQueryString(const XMLCh* const newQueryString);
267
268    /**
269     * Set the fragment for this URI. A non-null value is valid only
270     * if this is a URI conforming to the generic URI syntax and
271     * the path value is not null.
272     *
273     * @param newFragment the fragment for this URI
274     *
275     */
276     void setFragment(const XMLCh* const newFragment);
277
278     // -----------------------------------------------------------------------
279    //  Miscellaneous methods
280    // -----------------------------------------------------------------------
281
282    /**
283     * Determine whether a given string contains only URI characters (also
284     * called "uric" in RFC 2396). uric consist of all reserved
285     * characters, unreserved characters and escaped characters.
286     *
287     * @return true if the string is comprised of uric, false otherwise
288     */
289    static bool isURIString(const XMLCh* const uric);
290
291    /**
292     * Determine whether a given string is a valid URI
293     */
294    static bool isValidURI( const XMLUri* const baseURI
295                          , const XMLCh* const uriStr);
296    /**
297     * Determine whether a given string is a valid URI
298     */
299    static bool isValidURI( bool haveBaseURI
300                          , const XMLCh* const uriStr);
301
302
303    static void normalizeURI(const XMLCh*     const systemURI,
304                                   XMLBuffer&       normalizedURI);
305
306    /***
307     * Support for Serialization/De-serialization
308     ***/
309    DECL_XSERIALIZABLE(XMLUri)
310
311    XMLUri(MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager);
312
313private:
314
315    static const XMLCh MARK_OR_RESERVED_CHARACTERS[];
316    static const XMLCh RESERVED_CHARACTERS[];
317    static const XMLCh MARK_CHARACTERS[];
318    static const XMLCh SCHEME_CHARACTERS[];
319    static const XMLCh USERINFO_CHARACTERS[];
320    static const XMLCh REG_NAME_CHARACTERS[];
321    static const XMLCh PATH_CHARACTERS[];
322
323    //helper method for getUriText
324    void buildFullText();
325
326    // -----------------------------------------------------------------------
327    //  Private helper methods
328    // -----------------------------------------------------------------------
329
330    /**
331     * Determine whether a character is a reserved character:
332     *
333     * @return true if the string contains any reserved characters
334     */
335    static bool isReservedCharacter(const XMLCh theChar);
336   
337    /**
338     * Determine whether a character is a path character:
339     *
340     * @return true if the character is path character
341     */
342    static bool isPathCharacter(const XMLCh theChar);
343
344    /**
345     * Determine whether a char is an unreserved character.
346     *
347     * @return true if the char is unreserved, false otherwise
348     */
349    static bool isUnreservedCharacter(const XMLCh theChar);
350
351    /**
352     * Determine whether a char is an reserved or unreserved character.
353     *
354     * @return true if the char is reserved or unreserved, false otherwise
355     */               
356    static bool isReservedOrUnreservedCharacter(const XMLCh theChar);
357
358    /**
359     * Determine whether a scheme conforms to the rules for a scheme name.
360     * A scheme is conformant if it starts with an alphanumeric, and
361     * contains only alphanumerics, '+','-' and '.'.
362     *
363     * @return true if the scheme is conformant, false otherwise
364     */
365    static bool isConformantSchemeName(const XMLCh* const scheme);
366
367    /**
368     * Determine whether a userInfo conforms to the rules for a userinfo.
369     *
370     * @return true if the scheme is conformant, false otherwise
371     */
372    static void isConformantUserInfo(const XMLCh* const userInfo
373        , MemoryManager* const manager);
374   
375    /**
376     * Determines whether the components host, port, and user info
377     * are valid as a server authority.
378     *
379     * @return true if the given host, port, and userinfo compose
380     * a valid server authority
381     */
382    static bool isValidServerBasedAuthority(const XMLCh* const host
383                                           , const int hostLen
384                                           , const int port
385                                           , const XMLCh* const userinfo
386                                           , const int userLen);
387                                           
388    /**
389     * Determines whether the components host, port, and user info
390     * are valid as a server authority.
391     *
392     * @return true if the given host, port, and userinfo compose
393     * a valid server authority
394     */
395    static bool isValidServerBasedAuthority(const XMLCh* const host
396                                           , const int port
397                                           , const XMLCh* const userinfo
398                                           , MemoryManager* const manager);
399     
400   /**
401    * Determines whether the given string is a registry based authority.
402    *
403    * @param authority the authority component of a URI
404    *
405    * @return true if the given string is a registry based authority
406    */
407    static bool isValidRegistryBasedAuthority(const XMLCh* const authority
408                                             , const int authLen);
409
410   /**
411    * Determines whether the given string is a registry based authority.
412    *
413    * @param authority the authority component of a URI
414    *
415    * @return true if the given string is a registry based authority
416    */
417    static bool isValidRegistryBasedAuthority(const XMLCh* const authority);
418
419    /**
420     * Determine whether a string is syntactically capable of representing
421     * a valid IPv4 address, IPv6 reference or the domain name of a network host.
422     *
423     * A valid IPv4 address consists of four decimal digit groups
424     * separated by a '.'.
425     *
426     * See RFC 2732 Section 3, and RFC 2373 Section 2.2, for the
427     * definition of IPv6 references.
428     *
429     * A hostname consists of domain labels (each of which must begin and
430     * end with an alphanumeric but may contain '-') separated by a '.'.
431     * See RFC 2396 Section 3.2.2.
432     *
433     * @return true if the string is a syntactically valid IPv4 address
434     *              or hostname
435     */
436     static bool isWellFormedAddress(const XMLCh* const addr
437         , MemoryManager* const manager);
438     
439    /**
440     * Determines whether a string is an IPv4 address as defined by
441     * RFC 2373, and under the further constraint that it must be a 32-bit
442     * address. Though not expressed in the grammar, in order to satisfy
443     * the 32-bit address constraint, each segment of the address cannot
444     * be greater than 255 (8 bits of information).
445     *
446     * @return true if the string is a syntactically valid IPv4 address
447     */
448     static bool isWellFormedIPv4Address(const XMLCh* const addr, const int length);
449     
450    /**
451     * Determines whether a string is an IPv6 reference as defined
452     * by RFC 2732, where IPv6address is defined in RFC 2373. The
453     * IPv6 address is parsed according to Section 2.2 of RFC 2373,
454     * with the additional constraint that the address be composed of
455     * 128 bits of information.
456     *
457     * Note: The BNF expressed in RFC 2373 Appendix B does not
458     * accurately describe section 2.2, and was in fact removed from
459     * RFC 3513, the successor of RFC 2373.
460     *
461     * @return true if the string is a syntactically valid IPv6 reference
462     */
463     static bool isWellFormedIPv6Reference(const XMLCh* const addr, const int length);
464     
465    /**
466     * Helper function for isWellFormedIPv6Reference which scans the
467     * hex sequences of an IPv6 address. It returns the index of the
468     * next character to scan in the address, or -1 if the string
469     * cannot match a valid IPv6 address.
470     *
471     * @param address the string to be scanned
472     * @param index the beginning index (inclusive)
473     * @param end the ending index (exclusive)
474     * @param counter a counter for the number of 16-bit sections read
475     * in the address
476     *
477     * @return the index of the next character to scan, or -1 if the
478     * string cannot match a valid IPv6 address
479     */
480     static int scanHexSequence (const XMLCh* const addr, int index, int end, int& counter);
481
482    /**
483     * Get the indicator as to whether this URI uses the "generic URI"
484     * syntax.
485     *
486     * @return true if this URI uses the "generic URI" syntax, false
487     *         otherwise
488     */
489     bool isGenericURI();
490
491    // -----------------------------------------------------------------------
492    //  Miscellaneous methods
493    // -----------------------------------------------------------------------
494
495    /**
496     * Initialize all fields of this URI from another URI.
497     *
498     * @param toCopy the URI to copy (cannot be null)
499     */
500     void initialize(const XMLUri& toCopy);
501
502    /**
503     * Initializes this URI from a base URI and a URI specification string.
504     * See RFC 2396 Section 4 and Appendix B for specifications on parsing
505     * the URI and Section 5 for specifications on resolving relative URIs
506     * and relative paths.
507     *
508     * @param baseURI the base URI (may be null if uriSpec is an absolute
509     *               URI)
510     *
511     * @param uriSpec the URI spec string which may be an absolute or
512     *                  relative URI (can only be null/empty if base
513     *                  is not null)
514     *
515     */
516     void initialize(const XMLUri* const baseURI
517                   , const XMLCh*  const uriSpec);
518
519    /**
520     * Initialize the scheme for this URI from a URI string spec.
521     *
522     * @param uriSpec the URI specification (cannot be null)
523     *
524     */
525     void initializeScheme(const XMLCh* const uriSpec);
526
527    /**
528     * Initialize the authority (userinfo, host and port) for this
529     * URI from a URI string spec.
530     *
531     * @param uriSpec the URI specification (cannot be null)
532     *
533     */
534     void initializeAuthority(const XMLCh* const uriSpec);
535
536    /**
537     * Initialize the path for this URI from a URI string spec.
538     *
539     * @param uriSpec the URI specification (cannot be null)
540     *
541     */
542     void initializePath(const XMLCh* const uriSpec);
543
544     /**
545      * cleanup the data variables
546      *
547      */
548     void cleanUp();
549
550    static bool isConformantSchemeName(const XMLCh* const scheme,
551                                       const int schemeLen);
552    static bool processScheme(const XMLCh* const uriStr, int& index);
553    static bool processAuthority(const XMLCh* const uriStr, const int authLen);
554    static bool isWellFormedAddress(const XMLCh* const addr, const int addrLen);
555    static bool processPath(const XMLCh* const pathStr, const int pathStrLen,
556                            const bool isSchemePresent);
557
558    // -----------------------------------------------------------------------
559    //  Data members
560    //
561    //  for all the data member, we own it,
562    //  responsible for the creation and/or deletion for
563    //  the memory allocated.
564    //
565    // -----------------------------------------------------------------------
566    int             fPort;
567    XMLCh*          fScheme;
568    XMLCh*          fUserInfo;
569    XMLCh*          fHost;
570    XMLCh*          fRegAuth;
571    XMLCh*          fPath;
572    XMLCh*          fQueryString;
573    XMLCh*          fFragment;
574    XMLCh*          fURIText;
575    MemoryManager*  fMemoryManager;
576};
577
578// ---------------------------------------------------------------------------
579//  XMLUri: Getter methods
580// ---------------------------------------------------------------------------
581inline const XMLCh* XMLUri::getScheme() const
582{
583    return fScheme;
584}
585
586inline const XMLCh* XMLUri::getUserInfo() const
587{
588        return fUserInfo;
589}
590
591inline const XMLCh* XMLUri::getHost() const
592{
593        return fHost;
594}
595
596inline int XMLUri::getPort() const
597{
598        return fPort;
599}
600
601inline const XMLCh* XMLUri::getRegBasedAuthority() const
602{
603        return fRegAuth;
604}
605
606inline const XMLCh* XMLUri::getPath() const
607{
608        return fPath;
609}
610
611inline const XMLCh* XMLUri::getQueryString() const
612{
613        return fQueryString;
614}
615
616inline const XMLCh* XMLUri::getFragment() const
617{
618        return fFragment;
619}
620
621inline const XMLCh* XMLUri::getUriText() const
622{
623    //
624    //  Fault it in if not already. Since this is a const method and we
625    //  can't use mutable members due the compilers we have to support,
626    //  we have to cast off the constness.
627    //
628    if (!fURIText)
629        ((XMLUri*)this)->buildFullText();
630
631    return fURIText;
632}
633
634// ---------------------------------------------------------------------------
635//  XMLUri: Helper methods
636// ---------------------------------------------------------------------------
637inline bool XMLUri::isReservedOrUnreservedCharacter(const XMLCh theChar)
638{
639   return (XMLString::isAlphaNum(theChar) ||
640           XMLString::indexOf(MARK_OR_RESERVED_CHARACTERS, theChar) != -1);
641}
642
643inline bool XMLUri::isReservedCharacter(const XMLCh theChar)
644{
645    return (XMLString::indexOf(RESERVED_CHARACTERS, theChar) != -1);
646}
647
648inline bool XMLUri::isPathCharacter(const XMLCh theChar)
649{
650    return (XMLString::indexOf(PATH_CHARACTERS, theChar) != -1);
651}
652
653inline bool XMLUri::isUnreservedCharacter(const XMLCh theChar)
654{
655    return (XMLString::isAlphaNum(theChar) ||
656            XMLString::indexOf(MARK_CHARACTERS, theChar) != -1);
657}
658
659XERCES_CPP_NAMESPACE_END
660
661#endif
Note: See TracBrowser for help on using the repository browser.