/* * Copyright 2001-2004 The Apache Software Foundation. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* * $Id: XMLUri.hpp,v 1.18 2004/09/08 13:56:25 peiyongz Exp $ * $Log: XMLUri.hpp,v $ * Revision 1.18 2004/09/08 13:56:25 peiyongz * Apache License Version 2.0 * * Revision 1.17 2004/05/25 18:11:47 peiyongz * normalizeURI() added * * Revision 1.16 2004/01/12 22:01:02 cargilld * Minor performance change for handling reserved and unreserved characters. * * Revision 1.15 2003/12/17 00:18:35 cargilld * Update to memory management so that the static memory manager (one used to call Initialize) is only for static data. * * Revision 1.14 2003/12/11 22:21:25 neilg * fixes for the URI implementation to take registry names into account; much thanks to Michael Glavassevich * * Revision 1.13 2003/12/02 17:50:21 neilg * additional fix for bug 25118; once again, thanks to Jeroen Whitmond * * Revision 1.12 2003/10/01 00:20:41 knoaman * Add a static method to check whether a given string is a valid URI. * * Revision 1.11 2003/09/25 22:23:25 peiyongz * Implementation of Serialization/Deserialization * * Revision 1.10 2003/07/25 10:15:16 gareth * Patch by Michael Glavassevich * * The patch fixes Bugzilla #19787, #20006, #20009, #20010 and #20287, and * several other issues. A summary of the changes is listed below: * * 1. Added '[' and ']' to reserved characters as per RFC 2732. * 2. '[' and ']' added in RFC 2732, are not allowed in path segments, but * may appear in the opaque part. * 3. No URI can begin with a ':'. * 4. URI has no scheme if ':' occurs in a URI after '?' or '#', it's part of * the query string or fragment. * 5. Whitespace (even escaped as %20) is not permitted in the authority * portion of a URI. * 6. IPv4 addresses must match 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT "." * 1*3DIGIT. Since RFC 2732. * 7. IPv4 addresses are 32-bit, therefore no segment may be larger than 255. * This isn't expressed by the grammar. * 8. Hostnames cannot end with a '-'. * 9. Labels in a hostname must be 63 bytes or less [RFC 1034]. * 10. Hostnames may be no longer than 255 bytes [RFC 1034]. (That * restriction was already there. I just moved it inwards. * 11. Added support for IPv6 references added in RFC 2732. URIs such as * http://[::ffff:1.2.3.4] are valid. The BNF in RFC 2373 isn't correct. IPv6 * addresses are read according to section 2.2 of RFC 2373. * * Revision 1.9 2003/05/16 06:01:53 knoaman * Partial implementation of the configurable memory manager. * * Revision 1.8 2003/05/15 19:07:46 knoaman * Partial implementation of the configurable memory manager. * * Revision 1.7 2003/01/06 19:43:18 tng * New feature StandardUriConformant to force strict standard uri conformance. * * Revision 1.6 2002/11/21 15:42:39 gareth * Implemented copy constructor and operator =. Patch by Jennifer Schachter. * * Revision 1.5 2002/11/04 15:22:05 tng * C++ Namespace Support. * * Revision 1.4 2002/09/23 18:41:00 tng * DOM L3: Support baseURI. Add fURIText to XMLUri. Added by Gareth Reakes and Thomas Ford. * * Revision 1.3 2002/08/23 20:45:24 tng * .Memory leak fix: XMLUri data not deleted if constructor failed. * * Revision 1.2 2002/02/20 18:17:02 tng * [Bug 5977] Warnings on generating apiDocs. * * Revision 1.1.1.1 2002/02/01 22:22:17 peiyongz * sane_include * * Revision 1.3 2001/08/29 19:03:03 peiyongz * Bugzilla# 2816:on AIX 4.2, xlC 3 r ev.1, Compilation error on inline method * * Revision 1.2 2001/08/16 14:09:44 peiyongz * Removed unused ctors and methods * * Revision 1.1 2001/08/10 16:23:41 peiyongz * XMLUri: creation * * */ #if !defined(XMLURI_HPP) #define XMLURI_HPP #include #include #include #include XERCES_CPP_NAMESPACE_BEGIN /* * This class is a direct port of Java's URI class, to distinguish * itself from the XMLURL, we use the name XMLUri instead of * XMLURI. * * TODO: how to relate XMLUri and XMLURL since URL is part of URI. * */ class XMLUTIL_EXPORT XMLUri : public XSerializable, public XMemory { public: // ----------------------------------------------------------------------- // Constructors and Destructor // ----------------------------------------------------------------------- /** * Construct a new URI from a URI specification string. * * If the specification follows the "generic URI" syntax, (two slashes * following the first colon), the specification will be parsed * accordingly - setting the * scheme, * userinfo, * host, * port, * path, * querystring and * fragment * fields as necessary. * * If the specification does not follow the "generic URI" syntax, * the specification is parsed into a * scheme and * scheme-specific part (stored as the path) only. * * @param uriSpec the URI specification string (cannot be null or empty) * * @param manager Pointer to the memory manager to be used to * allocate objects. * * ctor# 2 * */ XMLUri(const XMLCh* const uriSpec, MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager); /** * Construct a new URI from a base URI and a URI specification string. * The URI specification string may be a relative URI. * * @param baseURI the base URI (cannot be null if uriSpec is null or * empty) * * @param uriSpec the URI specification string (cannot be null or * empty if base is null) * * @param manager Pointer to the memory manager to be used to * allocate objects. * * ctor# 7 relative ctor * */ XMLUri(const XMLUri* const baseURI , const XMLCh* const uriSpec , MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager); /** * Copy constructor */ XMLUri(const XMLUri& toCopy); XMLUri& operator=(const XMLUri& toAssign); virtual ~XMLUri(); // ----------------------------------------------------------------------- // Getter methods // ----------------------------------------------------------------------- /** * Get the URI as a string specification. See RFC 2396 Section 5.2. * * @return the URI string specification */ const XMLCh* getUriText() const; /** * Get the scheme for this URI. * * @return the scheme for this URI */ const XMLCh* getScheme() const; /** * Get the userinfo for this URI. * * @return the userinfo for this URI (null if not specified). */ const XMLCh* getUserInfo() const; /** * Get the host for this URI. * * @return the host for this URI (null if not specified). */ const XMLCh* getHost() const; /** * Get the port for this URI. * * @return the port for this URI (-1 if not specified). */ int getPort() const; /** * Get the registry based authority for this URI. * * @return the registry based authority (null if not specified). */ const XMLCh* getRegBasedAuthority() const; /** * Get the path for this URI. Note that the value returned is the path * only and does not include the query string or fragment. * * @return the path for this URI. */ const XMLCh* getPath() const; /** * Get the query string for this URI. * * @return the query string for this URI. Null is returned if there * was no "?" in the URI spec, empty string if there was a * "?" but no query string following it. */ const XMLCh* getQueryString() const; /** * Get the fragment for this URI. * * @return the fragment for this URI. Null is returned if there * was no "#" in the URI spec, empty string if there was a * "#" but no fragment following it. */ const XMLCh* getFragment() const; // ----------------------------------------------------------------------- // Setter methods // ----------------------------------------------------------------------- /** * Set the scheme for this URI. The scheme is converted to lowercase * before it is set. * * @param newScheme the scheme for this URI (cannot be null) * */ void setScheme(const XMLCh* const newScheme); /** * Set the userinfo for this URI. If a non-null value is passed in and * the host value is null, then an exception is thrown. * * @param newUserInfo the userinfo for this URI * */ void setUserInfo(const XMLCh* const newUserInfo); /** * Set the host for this URI. If null is passed in, the userinfo * field is also set to null and the port is set to -1. * * Note: This method overwrites registry based authority if it * previously existed in this URI. * * @param newHost the host for this URI * */ void setHost(const XMLCh* const newHost); /** * Set the port for this URI. -1 is used to indicate that the port is * not specified, otherwise valid port numbers are between 0 and 65535. * If a valid port number is passed in and the host field is null, * an exception is thrown. * * @param newPort the port number for this URI * */ void setPort(int newPort); /** * Sets the registry based authority for this URI. * * Note: This method overwrites server based authority * if it previously existed in this URI. * * @param newRegAuth the registry based authority for this URI */ void setRegBasedAuthority(const XMLCh* const newRegAuth); /** * Set the path for this URI. * * If the supplied path is null, then the * query string and fragment are set to null as well. * * If the supplied path includes a query string and/or fragment, * these fields will be parsed and set as well. * * Note: * * For URIs following the "generic URI" syntax, the path * specified should start with a slash. * * For URIs that do not follow the generic URI syntax, this method * sets the scheme-specific part. * * @param newPath the path for this URI (may be null) * */ void setPath(const XMLCh* const newPath); /** * Set the query string for this URI. A non-null value is valid only * if this is an URI conforming to the generic URI syntax and * the path value is not null. * * @param newQueryString the query string for this URI * */ void setQueryString(const XMLCh* const newQueryString); /** * Set the fragment for this URI. A non-null value is valid only * if this is a URI conforming to the generic URI syntax and * the path value is not null. * * @param newFragment the fragment for this URI * */ void setFragment(const XMLCh* const newFragment); // ----------------------------------------------------------------------- // Miscellaneous methods // ----------------------------------------------------------------------- /** * Determine whether a given string contains only URI characters (also * called "uric" in RFC 2396). uric consist of all reserved * characters, unreserved characters and escaped characters. * * @return true if the string is comprised of uric, false otherwise */ static bool isURIString(const XMLCh* const uric); /** * Determine whether a given string is a valid URI */ static bool isValidURI( const XMLUri* const baseURI , const XMLCh* const uriStr); /** * Determine whether a given string is a valid URI */ static bool isValidURI( bool haveBaseURI , const XMLCh* const uriStr); static void normalizeURI(const XMLCh* const systemURI, XMLBuffer& normalizedURI); /*** * Support for Serialization/De-serialization ***/ DECL_XSERIALIZABLE(XMLUri) XMLUri(MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager); private: static const XMLCh MARK_OR_RESERVED_CHARACTERS[]; static const XMLCh RESERVED_CHARACTERS[]; static const XMLCh MARK_CHARACTERS[]; static const XMLCh SCHEME_CHARACTERS[]; static const XMLCh USERINFO_CHARACTERS[]; static const XMLCh REG_NAME_CHARACTERS[]; static const XMLCh PATH_CHARACTERS[]; //helper method for getUriText void buildFullText(); // ----------------------------------------------------------------------- // Private helper methods // ----------------------------------------------------------------------- /** * Determine whether a character is a reserved character: * * @return true if the string contains any reserved characters */ static bool isReservedCharacter(const XMLCh theChar); /** * Determine whether a character is a path character: * * @return true if the character is path character */ static bool isPathCharacter(const XMLCh theChar); /** * Determine whether a char is an unreserved character. * * @return true if the char is unreserved, false otherwise */ static bool isUnreservedCharacter(const XMLCh theChar); /** * Determine whether a char is an reserved or unreserved character. * * @return true if the char is reserved or unreserved, false otherwise */ static bool isReservedOrUnreservedCharacter(const XMLCh theChar); /** * Determine whether a scheme conforms to the rules for a scheme name. * A scheme is conformant if it starts with an alphanumeric, and * contains only alphanumerics, '+','-' and '.'. * * @return true if the scheme is conformant, false otherwise */ static bool isConformantSchemeName(const XMLCh* const scheme); /** * Determine whether a userInfo conforms to the rules for a userinfo. * * @return true if the scheme is conformant, false otherwise */ static void isConformantUserInfo(const XMLCh* const userInfo , MemoryManager* const manager); /** * Determines whether the components host, port, and user info * are valid as a server authority. * * @return true if the given host, port, and userinfo compose * a valid server authority */ static bool isValidServerBasedAuthority(const XMLCh* const host , const int hostLen , const int port , const XMLCh* const userinfo , const int userLen); /** * Determines whether the components host, port, and user info * are valid as a server authority. * * @return true if the given host, port, and userinfo compose * a valid server authority */ static bool isValidServerBasedAuthority(const XMLCh* const host , const int port , const XMLCh* const userinfo , MemoryManager* const manager); /** * Determines whether the given string is a registry based authority. * * @param authority the authority component of a URI * * @return true if the given string is a registry based authority */ static bool isValidRegistryBasedAuthority(const XMLCh* const authority , const int authLen); /** * Determines whether the given string is a registry based authority. * * @param authority the authority component of a URI * * @return true if the given string is a registry based authority */ static bool isValidRegistryBasedAuthority(const XMLCh* const authority); /** * Determine whether a string is syntactically capable of representing * a valid IPv4 address, IPv6 reference or the domain name of a network host. * * A valid IPv4 address consists of four decimal digit groups * separated by a '.'. * * See RFC 2732 Section 3, and RFC 2373 Section 2.2, for the * definition of IPv6 references. * * A hostname consists of domain labels (each of which must begin and * end with an alphanumeric but may contain '-') separated by a '.'. * See RFC 2396 Section 3.2.2. * * @return true if the string is a syntactically valid IPv4 address * or hostname */ static bool isWellFormedAddress(const XMLCh* const addr , MemoryManager* const manager); /** * Determines whether a string is an IPv4 address as defined by * RFC 2373, and under the further constraint that it must be a 32-bit * address. Though not expressed in the grammar, in order to satisfy * the 32-bit address constraint, each segment of the address cannot * be greater than 255 (8 bits of information). * * @return true if the string is a syntactically valid IPv4 address */ static bool isWellFormedIPv4Address(const XMLCh* const addr, const int length); /** * Determines whether a string is an IPv6 reference as defined * by RFC 2732, where IPv6address is defined in RFC 2373. The * IPv6 address is parsed according to Section 2.2 of RFC 2373, * with the additional constraint that the address be composed of * 128 bits of information. * * Note: The BNF expressed in RFC 2373 Appendix B does not * accurately describe section 2.2, and was in fact removed from * RFC 3513, the successor of RFC 2373. * * @return true if the string is a syntactically valid IPv6 reference */ static bool isWellFormedIPv6Reference(const XMLCh* const addr, const int length); /** * Helper function for isWellFormedIPv6Reference which scans the * hex sequences of an IPv6 address. It returns the index of the * next character to scan in the address, or -1 if the string * cannot match a valid IPv6 address. * * @param address the string to be scanned * @param index the beginning index (inclusive) * @param end the ending index (exclusive) * @param counter a counter for the number of 16-bit sections read * in the address * * @return the index of the next character to scan, or -1 if the * string cannot match a valid IPv6 address */ static int scanHexSequence (const XMLCh* const addr, int index, int end, int& counter); /** * Get the indicator as to whether this URI uses the "generic URI" * syntax. * * @return true if this URI uses the "generic URI" syntax, false * otherwise */ bool isGenericURI(); // ----------------------------------------------------------------------- // Miscellaneous methods // ----------------------------------------------------------------------- /** * Initialize all fields of this URI from another URI. * * @param toCopy the URI to copy (cannot be null) */ void initialize(const XMLUri& toCopy); /** * Initializes this URI from a base URI and a URI specification string. * See RFC 2396 Section 4 and Appendix B for specifications on parsing * the URI and Section 5 for specifications on resolving relative URIs * and relative paths. * * @param baseURI the base URI (may be null if uriSpec is an absolute * URI) * * @param uriSpec the URI spec string which may be an absolute or * relative URI (can only be null/empty if base * is not null) * */ void initialize(const XMLUri* const baseURI , const XMLCh* const uriSpec); /** * Initialize the scheme for this URI from a URI string spec. * * @param uriSpec the URI specification (cannot be null) * */ void initializeScheme(const XMLCh* const uriSpec); /** * Initialize the authority (userinfo, host and port) for this * URI from a URI string spec. * * @param uriSpec the URI specification (cannot be null) * */ void initializeAuthority(const XMLCh* const uriSpec); /** * Initialize the path for this URI from a URI string spec. * * @param uriSpec the URI specification (cannot be null) * */ void initializePath(const XMLCh* const uriSpec); /** * cleanup the data variables * */ void cleanUp(); static bool isConformantSchemeName(const XMLCh* const scheme, const int schemeLen); static bool processScheme(const XMLCh* const uriStr, int& index); static bool processAuthority(const XMLCh* const uriStr, const int authLen); static bool isWellFormedAddress(const XMLCh* const addr, const int addrLen); static bool processPath(const XMLCh* const pathStr, const int pathStrLen, const bool isSchemePresent); // ----------------------------------------------------------------------- // Data members // // for all the data member, we own it, // responsible for the creation and/or deletion for // the memory allocated. // // ----------------------------------------------------------------------- XMLCh* fScheme; XMLCh* fUserInfo; XMLCh* fHost; int fPort; XMLCh* fRegAuth; XMLCh* fPath; XMLCh* fQueryString; XMLCh* fFragment; XMLCh* fURIText; MemoryManager* fMemoryManager; }; // --------------------------------------------------------------------------- // XMLUri: Getter methods // --------------------------------------------------------------------------- inline const XMLCh* XMLUri::getScheme() const { return fScheme; } inline const XMLCh* XMLUri::getUserInfo() const { return fUserInfo; } inline const XMLCh* XMLUri::getHost() const { return fHost; } inline int XMLUri::getPort() const { return fPort; } inline const XMLCh* XMLUri::getRegBasedAuthority() const { return fRegAuth; } inline const XMLCh* XMLUri::getPath() const { return fPath; } inline const XMLCh* XMLUri::getQueryString() const { return fQueryString; } inline const XMLCh* XMLUri::getFragment() const { return fFragment; } inline const XMLCh* XMLUri::getUriText() const { // // Fault it in if not already. Since this is a const method and we // can't use mutable members due the compilers we have to support, // we have to cast off the constness. // if (!fURIText) ((XMLUri*)this)->buildFullText(); return fURIText; } // --------------------------------------------------------------------------- // XMLUri: Helper methods // --------------------------------------------------------------------------- inline bool XMLUri::isReservedOrUnreservedCharacter(const XMLCh theChar) { return (XMLString::isAlphaNum(theChar) || XMLString::indexOf(MARK_OR_RESERVED_CHARACTERS, theChar) != -1); } inline bool XMLUri::isReservedCharacter(const XMLCh theChar) { return (XMLString::indexOf(RESERVED_CHARACTERS, theChar) != -1); } inline bool XMLUri::isPathCharacter(const XMLCh theChar) { return (XMLString::indexOf(PATH_CHARACTERS, theChar) != -1); } inline bool XMLUri::isUnreservedCharacter(const XMLCh theChar) { return (XMLString::isAlphaNum(theChar) || XMLString::indexOf(MARK_CHARACTERS, theChar) != -1); } XERCES_CPP_NAMESPACE_END #endif