00001 #ifndef s11n_io_URL_HPP_INCLUDED 00002 #define s11n_io_URL_HPP_INCLUDED 1 00003 00004 #include <string> 00005 #include <s11n.net/s11n/export.hpp> 00006 #include <s11n.net/s11n/refcount.hpp> 00007 #include <s11n.net/s11n/factory.hpp> 00008 00009 // Reminder: RFC1738: http://www.ietf.org/rfc/rfc1738.txt 00010 00011 namespace s11n { namespace io { 00012 00013 /** 00014 url_parser is a basic implementation for 00015 parsing a URL string into its atomic components. 00016 It is not a full-featured parser, for example it does 00017 not parse key=value arguments at the end of a URL. 00018 00019 This type uses reference-counted internal data and 00020 copy-on-write, so copying it is cheap. 00021 */ 00022 class S11N_EXPORT_API url_parser 00023 { 00024 public: 00025 /** 00026 Parses the given URL. good() reveals the status 00027 of the parse. 00028 */ 00029 url_parser( std::string const & ); 00030 /** 00031 Creates an empty (!good()) parser. 00032 */ 00033 url_parser(); 00034 // Rely on default copy/assign ops: 00035 // url_parser & url_parser( url_parser const & ); 00036 // url_parser & operator=( url_parser const & ); 00037 /** 00038 Functions the same as the string-argument ctor. 00039 */ 00040 url_parser & operator=( std::string const & ); 00041 ~url_parser(); 00042 /** 00043 Returns true if the last parse() got a "valid" URL. 00044 */ 00045 bool good() const; 00046 00047 /** 00048 Parses URLs of the following forms: 00049 00050 scheme://[user[:password]@]host[:[port[:]]][/path/to/resource] 00051 00052 Note that host may have an optional ':' after it 00053 without a port number, and that a port number may be followed 00054 by an optional ':' character. This is to accommodate ssh 00055 URLs and the like: 00056 00057 ssh://user\@host:/path 00058 00059 ssh://user\@host:33:/path 00060 00061 This function returns the same as good(). 00062 00063 If this function returns false then the contents of this 00064 objects are in an undefined state. They should not be used 00065 before a call to parse() succeeds. 00066 */ 00067 bool parse( std::string const & ); 00068 00069 /** Returns the URL most recently passed to parse(). */ 00070 std::string url() const; 00071 /** Returns the scheme part of url(). */ 00072 std::string scheme() const; 00073 /** Returns the user name part of url(), which may be empty. */ 00074 std::string user() const; 00075 /** Returns the user password part of url(), which may be empty. */ 00076 std::string password() const; 00077 /** Returns the host part of url(). */ 00078 std::string host() const; 00079 /** 00080 Returns the resource path part of url(), which may be empty. 00081 00082 Contrary to RFC1738, a leading slash in a URL *is* considered 00083 to be part of the path. 00084 00085 In some protocols (e.g. http) an empty path can be 00086 considered the same as '/', but on others 00087 (e.g. file) such interpretation is not appropriate. 00088 */ 00089 std::string path() const; 00090 00091 /** 00092 If the URL path has a '?' in it, anything after the '?' 00093 is assumed to be a list of arguments, e.g. as those passed 00094 to HTTP GET requests. This string does not contain the leading 00095 '?'. 00096 */ 00097 std::string args_str() const; 00098 00099 typedef std::map<std::string,std::string> args_map_type; 00100 args_map_type const & args_map() const; 00101 00102 /** Returns the port number part of url(), or 0 if no port was specified. */ 00103 unsigned short port() const; 00104 private: 00105 /** 00106 impl holds the private data for a url_parser. 00107 00108 PS: i hate that this has to be in the public 00109 header, but rcptr<impl> needs impl to be a complete 00110 type. 00111 00112 TODO: get rid of the rcptr<> usage and hide the pimpl 00113 in the implementation. Also consider storing all the 00114 string data in a single (char*), using NULLs to delimit 00115 it. That'd save a lot of space. 00116 */ 00117 struct impl 00118 { 00119 std::string url; 00120 std::string proto; 00121 std::string user; 00122 std::string pass; 00123 std::string host; 00124 unsigned short port; 00125 std::string path; 00126 std::string args_str; 00127 bool good; 00128 args_map_type args_map; 00129 impl(); 00130 }; 00131 s11n::refcount::rcptr<impl> pimpl; 00132 }; 00133 00134 /** 00135 A factory type intended to be subclassed to provide 00136 protocol-specific i/o streams. 00137 00138 Subclasses must reimplement the virtual functions and 00139 register with the classloader like so: 00140 00141 <pre> 00142 #define S11N_FACREG_TYPE my_subclass_type 00143 #define S11N_FACREG_INTERFACE_TYPE s11n::io::url_stream_factory 00144 #define S11N_FACREG_TYPE_NAME "my_subclass_type" 00145 #include <s11n.net/s11n/factory_reg.hpp> 00146 </pre> 00147 00148 They may also want to set up classloader aliases during 00149 the static initialization phase, as demonstrated for 00150 the file:// protocol in url.cpp. 00151 00152 Note for subclasser: NEVER EVER call 00153 s11n::io::get_i/ostream() from this class, because those 00154 functions dispatch to url_stream_factory when possible, and 00155 callint those from here can cause an endless loop. 00156 00157 */ 00158 class S11N_EXPORT_API url_stream_factory 00159 { 00160 protected: 00161 url_stream_factory() {} 00162 00163 /** 00164 Default implementation returns 0. Subclasses.should return an instance 00165 of a stream capable of writing to the given URL. On error they should 00166 return 0 or throw an exception. 00167 00168 The caller owns the returned pointer, which may be 0. 00169 */ 00170 virtual std::ostream * do_get_ostream( url_parser const & url ) const 00171 { 00172 return 0; 00173 } 00174 /** 00175 Default implementation returns 0. Subclasses.should return an instance 00176 of a stream capable of reading from the given URL. On error they should 00177 return 0 or throw an exception. 00178 00179 The caller owns the returned pointer, which may be 0. 00180 */ 00181 virtual std::istream * do_get_istream( url_parser const & url ) const 00182 { 00183 return 0; 00184 } 00185 00186 public: 00187 virtual ~url_stream_factory() {} 00188 00189 /** 00190 See do_get_ostream(). 00191 */ 00192 std::ostream * get_ostream( url_parser const & url ) const 00193 { 00194 return this->do_get_ostream( url ); 00195 } 00196 /** 00197 See do_get_istream(). 00198 */ 00199 std::istream * get_istream( url_parser const & url ) const 00200 { 00201 return this->do_get_istream( url ); 00202 } 00203 00204 /** 00205 Classloads an instance of url_stream_factory 00206 associated with the given scheme. Caller owns the 00207 returned pointer, which may be 0. 00208 00209 Subclass authors are responsible for registering their 00210 subclasses with the url_stream_factory classloader. 00211 */ 00212 static url_stream_factory * create_factory_for_scheme( std::string const & scheme ); 00213 00214 /** 00215 Registers SubclassT as a subclass of 00216 url_stream_factory such that calling 00217 create_factory_for_scheme(scheme) will return an 00218 instance of SubclassT. SubclassT must be-a 00219 url_stream_factory and must be compatible with the 00220 s11n::fac factory layer. 00221 */ 00222 template <typename SubclassT> 00223 static void register_factory_for_scheme( std::string const & scheme ) 00224 { 00225 s11n::fac::register_subtype< url_stream_factory, SubclassT >( scheme ); 00226 } 00227 00228 }; 00229 00230 /** 00231 Convenience overload. 00232 */ 00233 std::istream * get_url_istream( std::string const & url ); 00234 /** 00235 Classloads an instance of an istream, using a 00236 url_stream_factory to create the stream. Caller owns the 00237 returned pointer, which may be 0. Failure indicates one of: 00238 00239 - !url.good() 00240 00241 - no url_stream_factory was mapped to url.scheme(). 00242 00243 - The factory could not create the required stream. 00244 */ 00245 std::istream * get_url_istream( url_parser const & url ); 00246 00247 /** 00248 Convenience overload. 00249 */ 00250 std::ostream * get_url_ostream( std::string const & url ); 00251 00252 /** 00253 See get_url_istream(). 00254 */ 00255 std::ostream * get_url_ostream( url_parser const & url ); 00256 00257 /** 00258 This factory creates streams for URLs in the following format: 00259 00260 file:[//]/path/to/file 00261 00262 It works for input and output. 00263 00264 If your libs11n is configured/built with 00265 s11n_CONFIG_HAVE_ZFSTREAM set to true then the zfstream 00266 library is used to support bzip2/gzip files. 00267 */ 00268 class S11N_EXPORT_API file_stream_factory : public url_stream_factory 00269 { 00270 public: 00271 file_stream_factory(); 00272 virtual ~file_stream_factory(); 00273 00274 protected: 00275 /** 00276 Creates an ostream for a file:// URL. If your 00277 s11n is built with zfstream support, then 00278 the compressors supported by that library 00279 are supported here. 00280 00281 The caller owns the returned pointer, which may be 00282 0. 00283 */ 00284 virtual std::ostream * do_get_ostream( url_parser const & url ) const; 00285 /** 00286 Creates an istream for a file:// URL. If your 00287 s11n is built with zfstream support, then 00288 the compressors supported by that library 00289 are supported here. 00290 00291 The caller owns the returned pointer, which may be 00292 0. 00293 */ 00294 virtual std::istream * do_get_istream( url_parser const & url ) const; 00295 }; 00296 00297 }} // namespaces 00298 00299 00300 00301 #endif // s11n_io_URL_HPP_INCLUDED