url.hpp

Go to the documentation of this file.
00001 #ifndef s11n_io_URL_HPP_INCLUDED
00002 #define s11n_io_URL_HPP_INCLUDED 1
00003 
00004 #include <string>
00005 #include <s11n.net/s11n/export.hpp>
00006 #include <s11n.net/s11n/refcount.hpp>
00007 #include <s11n.net/s11n/factory.hpp>
00008 
00009 // Reminder: RFC1738: http://www.ietf.org/rfc/rfc1738.txt
00010 
00011 namespace s11n { namespace io {
00012 
00013     /**
00014        url_parser is a basic implementation for
00015        parsing a URL string into its atomic components.
00016        It is not a full-featured parser, for example it does
00017        not parse key=value arguments at the end of a URL.
00018 
00019        This type uses reference-counted internal data and
00020        copy-on-write, so copying it is cheap.
00021     */
00022     class S11N_EXPORT_API url_parser
00023     {
00024     public:
00025         /**
00026            Parses the given URL. good() reveals the status
00027            of the parse.
00028         */
00029         url_parser( std::string const & );
00030         /**
00031            Creates an empty (!good()) parser.
00032         */
00033         url_parser();
00034 // Rely on default copy/assign ops:
00035 //      url_parser & url_parser( url_parser const & );
00036 //      url_parser & operator=( url_parser const & );
00037         /**
00038            Functions the same as the string-argument ctor.
00039          */
00040         url_parser & operator=( std::string const & );
00041         ~url_parser();
00042         /**
00043            Returns true if the last parse() got a "valid" URL.
00044         */
00045         bool good() const;
00046 
00047         /**
00048            Parses URLs of the following forms:
00049 
00050            scheme://[user[:password]@]host[:[port[:]]][/path/to/resource]
00051 
00052            Note that host may have an optional ':' after it
00053            without a port number, and that a port number may be followed
00054            by an optional ':' character. This is to accommodate ssh
00055            URLs and the like:
00056 
00057            ssh://user\@host:/path
00058 
00059            ssh://user\@host:33:/path
00060 
00061            This function returns the same as good().
00062 
00063            If this function returns false then the contents of this
00064            objects are in an undefined state. They should not be used
00065            before a call to parse() succeeds.
00066         */
00067         bool parse( std::string const & );
00068 
00069         /** Returns the URL most recently passed to parse(). */
00070         std::string url() const;
00071         /** Returns the scheme part of url(). */
00072         std::string scheme() const;
00073         /** Returns the user name part of url(), which may be empty. */
00074         std::string user() const;
00075         /** Returns the user password part of url(), which may be empty. */
00076         std::string password() const;
00077         /** Returns the host part of url(). */
00078         std::string host() const;
00079         /**
00080            Returns the resource path part of url(), which may be empty.
00081 
00082            Contrary to RFC1738, a leading slash in a URL *is* considered
00083            to be part of the path.
00084 
00085            In some protocols (e.g. http) an empty path can be
00086            considered the same as '/', but on others
00087            (e.g. file) such interpretation is not appropriate.
00088         */
00089         std::string path() const;
00090 
00091         /**
00092            If the URL path has a '?' in it, anything after the '?'
00093            is assumed to be a list of arguments, e.g. as those passed
00094            to HTTP GET requests. This string does not contain the leading
00095            '?'.
00096         */
00097         std::string args_str() const;
00098 
00099         typedef std::map<std::string,std::string> args_map_type;
00100         args_map_type const & args_map() const;
00101 
00102         /** Returns the port number part of url(), or 0 if no port was specified. */
00103         unsigned short port() const;
00104     private:
00105         /**
00106            impl holds the private data for a url_parser.
00107 
00108            PS: i hate that this has to be in the public
00109            header, but rcptr<impl> needs impl to be a complete
00110            type.
00111 
00112            TODO: get rid of the rcptr<> usage and hide the pimpl
00113            in the implementation. Also consider storing all the
00114            string data in a single (char*), using NULLs to delimit
00115            it. That'd save a lot of space.
00116         */
00117         struct impl
00118         {
00119             std::string url;
00120             std::string proto;
00121             std::string user;
00122             std::string pass;
00123             std::string host;
00124             unsigned short port;
00125             std::string path;
00126             std::string args_str;
00127             bool good;
00128             args_map_type args_map;
00129             impl();
00130         };
00131         s11n::refcount::rcptr<impl> pimpl;
00132     };
00133 
00134     /**
00135        A factory type intended to be subclassed to provide
00136        protocol-specific i/o streams.
00137 
00138        Subclasses must reimplement the virtual functions and
00139        register with the classloader like so:
00140 
00141 <pre>
00142 #define S11N_FACREG_TYPE my_subclass_type
00143 #define S11N_FACREG_INTERFACE_TYPE s11n::io::url_stream_factory
00144 #define S11N_FACREG_TYPE_NAME "my_subclass_type"
00145 #include <s11n.net/s11n/factory_reg.hpp>
00146 </pre>
00147 
00148            They may also want to set up classloader aliases during
00149        the static initialization phase, as demonstrated for
00150        the file:// protocol in url.cpp.
00151 
00152        Note for subclasser: NEVER EVER call
00153        s11n::io::get_i/ostream() from this class, because those
00154        functions dispatch to url_stream_factory when possible, and
00155        callint those from here can cause an endless loop.
00156 
00157     */
00158     class S11N_EXPORT_API url_stream_factory
00159     {
00160     protected:
00161         url_stream_factory() {}
00162 
00163         /**
00164            Default implementation returns 0. Subclasses.should return an instance
00165            of a stream capable of writing to the given URL. On error they should
00166            return 0 or throw an exception.
00167 
00168            The caller owns the returned pointer, which may be 0.
00169         */
00170         virtual std::ostream * do_get_ostream( url_parser const & url ) const
00171         {
00172             return 0;
00173         }
00174         /**
00175            Default implementation returns 0. Subclasses.should return an instance
00176            of a stream capable of reading from the given URL. On error they should
00177            return 0 or throw an exception.
00178 
00179            The caller owns the returned pointer, which may be 0.
00180         */
00181         virtual std::istream * do_get_istream( url_parser const & url ) const
00182         {
00183             return 0;
00184         }
00185 
00186     public:
00187         virtual ~url_stream_factory() {}
00188 
00189         /**
00190            See do_get_ostream().
00191         */
00192         std::ostream * get_ostream( url_parser const & url ) const
00193         {
00194             return this->do_get_ostream( url );
00195         }
00196         /**
00197            See do_get_istream().
00198         */
00199         std::istream * get_istream( url_parser const & url ) const
00200         {
00201             return this->do_get_istream( url );
00202         }
00203 
00204         /**
00205            Classloads an instance of url_stream_factory
00206            associated with the given scheme. Caller owns the
00207            returned pointer, which may be 0.
00208 
00209            Subclass authors are responsible for registering their
00210            subclasses with the url_stream_factory classloader.
00211         */
00212         static url_stream_factory * create_factory_for_scheme( std::string const & scheme );
00213 
00214         /**
00215            Registers SubclassT as a subclass of
00216            url_stream_factory such that calling
00217            create_factory_for_scheme(scheme) will return an
00218            instance of SubclassT. SubclassT must be-a
00219            url_stream_factory and must be compatible with the
00220            s11n::fac factory layer.
00221         */
00222         template <typename SubclassT>
00223         static void register_factory_for_scheme( std::string const & scheme )
00224         {
00225             s11n::fac::register_subtype< url_stream_factory, SubclassT >( scheme );
00226         }
00227 
00228     };
00229 
00230     /**
00231        Convenience overload.
00232     */
00233     std::istream * get_url_istream( std::string const & url );
00234     /**
00235        Classloads an instance of an istream, using a
00236        url_stream_factory to create the stream. Caller owns the
00237        returned pointer, which may be 0. Failure indicates one of:
00238 
00239        - !url.good()
00240 
00241        - no url_stream_factory was mapped to url.scheme().
00242 
00243        - The factory could not create the required stream.
00244     */
00245     std::istream * get_url_istream( url_parser const & url );
00246 
00247     /**
00248        Convenience overload.
00249     */
00250     std::ostream * get_url_ostream( std::string const & url );
00251 
00252     /**
00253        See get_url_istream().
00254     */
00255     std::ostream * get_url_ostream( url_parser const & url );
00256 
00257     /**
00258        This factory creates streams for URLs in the following format:
00259 
00260        file:[//]/path/to/file
00261 
00262        It works for input and output.
00263 
00264        If your libs11n is configured/built with
00265        s11n_CONFIG_HAVE_ZFSTREAM set to true then the zfstream
00266        library is used to support bzip2/gzip files.
00267     */
00268     class S11N_EXPORT_API file_stream_factory : public url_stream_factory
00269     {
00270     public:
00271         file_stream_factory();
00272         virtual ~file_stream_factory();
00273 
00274     protected:
00275         /**
00276            Creates an ostream for a file:// URL. If your
00277            s11n is built with zfstream support, then
00278            the compressors supported by that library
00279            are supported here.
00280 
00281            The caller owns the returned pointer, which may be
00282            0.
00283         */
00284         virtual std::ostream * do_get_ostream( url_parser const & url ) const;
00285         /**
00286            Creates an istream for a file:// URL. If your
00287            s11n is built with zfstream support, then
00288            the compressors supported by that library
00289            are supported here.
00290 
00291            The caller owns the returned pointer, which may be
00292            0.
00293         */
00294         virtual std::istream * do_get_istream( url_parser const & url ) const;
00295     };
00296 
00297 }} // namespaces
00298 
00299 
00300 
00301 #endif // s11n_io_URL_HPP_INCLUDED

Generated on Wed Jun 4 21:44:19 2008 for libs11n by  doxygen 1.5.3