data_node_io.hpp

Go to the documentation of this file.
00001 #ifndef s11n_DATA_NODE_IO_H_INCLUDED
00002 #define s11n_DATA_NODE_IO_H_INCLUDED
00003 
00004 ////////////////////////////////////////////////////////////////////////
00005 // data_node_io.hpp
00006 // some i/o interfaces & helpers for s11n
00007 // License: Public Domain
00008 // Author: stephan@s11n.net
00009 ////////////////////////////////////////////////////////////////////////
00010 
00011 
00012 #include <string>
00013 #include <sstream>
00014 #include <list>
00015 #include <map>
00016 #include <deque>
00017 #include <iostream>
00018 #include <memory>// auto_ptr
00019 
00020 #include <cassert>
00021 #include <typeinfo>
00022 
00023 
00024 
00025 // #include <s11n.net/cl/cllite.hpp> // for opening DLLs
00026 
00027 #include <s11n.net/s11n/phoenix.hpp> // phoenix class
00028 
00029 #include <s11n.net/s11n/exception.hpp>
00030 #include <s11n.net/s11n/s11n_debuggering_macros.hpp> // COUT/CERR
00031 #include <s11n.net/s11n/classload.hpp> // classloader()
00032 #include <s11n.net/s11n/serialize.hpp> // unfortunately dep
00033 #include <s11n.net/s11n/traits.hpp> // s11n_traits & node_traits
00034 
00035 #include <s11n.net/s11n/export.hpp> // for exporting symbols to DLL
00036 
00037 ////////////////////////////////////////////////////////////////////////////////
00038 // NO DEPS ON s11n_node.hpp ALLOWED!
00039 ////////////////////////////////////////////////////////////////////////////////
00040 
00041 
00042 #define s11n_SERIALIZER_ENABLE_INTERACTIVE 0 /* an experiment. */
00043 
00044 namespace s11n {
00045 
00046         namespace io {
00047 
00048         /**
00049            Returns an output stream for the given file
00050            name. Caller owns the returned pointer, which may
00051            be 0.
00052 
00053            As of version 1.3.0, this function supports URLs.
00054            URLs will be dispatched to get_url_ostream().
00055 
00056            When passed a non-URL filename, the returned stream
00057            supports libzl and libbz2 if your libs11n is built
00058            with libs11n_zfstream support, meaning it can read
00059            files compressed with zlib/gzip or bz2lib.
00060         */
00061         std::ostream * get_ostream( const std::string name );
00062 
00063         /**
00064            Returns an input stream for the given file
00065            name. Caller owns the returned pointer, which may
00066            be 0.
00067 
00068            As of version 1.3.0, this function supports URLs.
00069            URLs will be dispatched to get_url_ostream().
00070 
00071            When passed a non-URL filename the returned stream
00072            supports libzl and libbz2 if your libs11n is built
00073            with libs11n_zfstream support. That means that if
00074            zfstream::compression_policy() is set, then the
00075            returned string might be a compressing stream.
00076 
00077            In version 1.3.0 the 2nd parameter (ExternalData)
00078            was removed because it is never used.
00079         */
00080         std::istream * get_istream( const std::string name );
00081 
00082                 /**
00083                    Convenience function for grabbing the first line of a file.
00084 
00085            See get_magic_cookie( istream & ) for notes on a
00086            minor functional change introduced in version
00087            1.2.1.
00088 
00089            Changed in 1.3.0: second parameter to this function
00090            (bool ExternalData) was removed.
00091                 */
00092                 std::string get_magic_cookie( const std::string & src );
00093 
00094                 /**
00095                    Convenience function for grabbing the first line of a
00096                    stream.
00097 
00098                    Returns the first line of the given stream, or an
00099                    empty string on error.
00100 
00101            As of version 1.2.1, this function behaves slightly
00102            differently than prior versions: the returned
00103            string will be the first consecutive non-control
00104            characters in the line.  This allows us to properly
00105            read some binary formats which use a string
00106            identifier as a magic cookie (e.g.  sqlite
00107            databases). In this context "control characters"
00108            are anything outside the range of ASCII values
00109            [32..126]. This change "shouldn't" affect any
00110            pre-1.2.1 behaviours, which were never tested/used
00111            with binary file formats.
00112                 */
00113                 std::string get_magic_cookie( std::istream & is );
00114 
00115 #if s11n_SERIALIZER_ENABLE_INTERACTIVE
00116         struct progress_reporter
00117         {
00118             progress_reporter() {}
00119             virtual ~progress_reporter() {}
00120             virtual void operator()( size_t pos, size_t total ) = 0;
00121         };
00122 #endif // s11n_SERIALIZER_ENABLE_INTERACTIVE
00123 
00124 
00125                 /**
00126                    data_node_serializer provides an interface for
00127                    saving/loading a given abstract data node type
00128                    to/from streams.
00129 
00130                    It is designed for containers which comply with
00131                    s11n's Data Node interface and conventions.
00132 
00133 
00134                    Conventions:
00135 
00136                    Must provide:
00137 
00138                    typedef NodeT node_type
00139 
00140                    Two de/serialize functions, following the
00141                    stream-based interface shown here (filename-based
00142                    variants are optional, but convenient for clients).
00143 
00144 
00145            Potential TODOs for 1.3/1.4:
00146 
00147            - Add cancel() and cancelled() to set/query the
00148            read state. This is to support Cancel operations in
00149            UIs which load slow-loading (sqlite3) formats or
00150            large sets and want to safely cancel. Once
00151            cancelled, a read is not restartable (or this is
00152            not guaranteed). All we can really do is provide a
00153            flag and conventions and hope implementations
00154            respect them.
00155 
00156            - Provide some sort of progress feedback mechanism,
00157            at least for reading, again to support users of
00158            slow Serializers. This is complicated by the
00159            unknown-read-size nature of Serializers.
00160                 */
00161                 template <typename NodeT>
00162                 class S11N_EXPORT_API data_node_serializer
00163                 {
00164                 public:
00165                         /**
00166                            The underlying data type used to store
00167                            serialized data.
00168                         */
00169                         typedef NodeT node_type;
00170 
00171 
00172                         data_node_serializer()
00173                         {
00174                                 this->magic_cookie( "WARNING: magic_cookie() not set!" );
00175                                 // ^^^ subclasses must do this.
00176                 typedef ::s11n::node_traits<node_type> NTR;
00177                 NTR::name( this->metadata(), "serializer_metadata" );
00178                                 // this->metadata().name( "serializer_metadata" );
00179                 using namespace s11n::debug;
00180                 S11N_TRACE(TRACE_CTOR) << "data_node_serialier()\n";
00181 
00182                         };
00183                         virtual ~data_node_serializer()
00184             {
00185                 using namespace s11n::debug;
00186                 S11N_TRACE(TRACE_DTOR) << "~data_node_serialier() ["<<this->magic_cookie()<<"]\n";
00187             }
00188 
00189 
00190                         /**
00191                            A convenience typedef, mainly for subclasses.
00192                         */
00193                         typedef std::map<std::string,std::string> translation_map;
00194 
00195                         /**
00196                            Returns a map intended for use with
00197                            ::s11n::io::strtool::translate_entities().
00198                            
00199                            The default implementation returns an empty map.
00200                            
00201                            Subclasses should override this to return a translation
00202                            map, if they need one. The default map is empty.
00203 
00204                            Be aware that this may very well be called
00205                            post-main(), so subclasses should take that into
00206                            account and provide post-main()-safe maps!
00207                         */
00208                         virtual const translation_map & entity_translations() const
00209                         {
00210                                 typedef ::s11n::Detail::phoenix<translation_map,data_node_serializer<node_type> > TMap;
00211                                 return TMap::instance();
00212                         }
00213 
00214 
00215 
00216                         /**
00217                            Must be implemented to format node_type to the given ostream.
00218 
00219                            It should return true on success, false on error.
00220 
00221                            The default implementation always returns false.
00222 
00223                            Note that this function does not use
00224                            s11n::serialize() in any way, and is only
00225                            coincidentally related to it.
00226                         */
00227                         virtual bool serialize( const node_type & /*src*/, std::ostream & /*dest*/ )
00228                         {
00229                                 return false;
00230                         }
00231 
00232                        /**
00233                            Overloaded to save dest to the given filename.
00234 
00235                            The default implementation treats destfile
00236                            as a file name and passes the call on to
00237                            serialize(node_type,ostream).  The output
00238                            file is compressed if zfstream::compression_policy()
00239                            has been set to enable it.
00240 
00241                            Returns true on success, false on error.
00242 
00243                            This function is virtual so that
00244                            Serializers which do not deal with
00245                            i/ostreams (e.g., those which use a
00246                            database connection) can override it to
00247                            interpret destfile as, e.g., a
00248                            database-related string (e.g., connection,
00249                            db object name, or whatever).
00250 
00251                Fixed in 1.0.2: returns false when destfile
00252                is empty.
00253 
00254                Fixed in 1.2.6: if this->serialize(src,outstream) throws then
00255                this function no longer leaks.
00256                         */
00257                         virtual bool serialize( const node_type & src, const std::string & destfile )
00258                         {
00259                 if( destfile.empty() ) return false;
00260                                 s11n::Detail::auto_ptr<std::ostream> os( ::s11n::io::get_ostream( destfile ) );
00261                                 if( ! os.get() ) return false;
00262                                 bool b = this->serialize( src, *os );
00263                                 return b;
00264                         }
00265 
00266                         /**
00267                            Must be implemented to parse a node_type from the given istream.
00268 
00269                            It should return true on success, false on error.
00270 
00271                            The default implementation always returns 0 and does nothing.
00272 
00273                            Note that this function does not use
00274                            s11n::deserialize() in any way, and is only
00275                            coincidentally related to it.
00276 
00277                Subclasses should try not to have to buffer
00278                the whole stream before parsing, because
00279                object trees can be arbitrarily large and a
00280                buffered copy effectively doubles the
00281                memory needed to store the tree during the
00282                deserialization process. Buffering
00283                behaviour is unspecified by this interface,
00284                however, and subclasses may pre-buffer the
00285                whole stream content if they need to.
00286                         */
00287                         virtual node_type * deserialize( std::istream & )
00288                         {
00289                                 return 0;
00290                         }
00291 
00292 
00293                         /**
00294                            Overloaded to load dest from the given filename.
00295 
00296                            It supports zlib/bz2lib decompression for
00297                            files if your s11n lib supports them.
00298 
00299                            This is virtual for the same reason as
00300                            serialize(string).
00301 
00302                         */
00303                         virtual node_type * deserialize( const std::string & src )
00304                         {
00305                                 typedef s11n::Detail::auto_ptr<std::istream> AP;
00306                                 AP is( ::s11n::io::get_istream( src ) );
00307                                 if( ! is.get() ) return 0;
00308                                 return this->deserialize( *is );
00309                         }
00310 
00311 
00312                         /**
00313                            Gets this object's magic cookie.
00314 
00315                            Cookies are registered with
00316                            <code>class_loader< data_node_serializer<NodeType> ></code>
00317                            types to map files to file input parsers.
00318                         */
00319                         std::string magic_cookie() const
00320                         {
00321                                 return this->m_cookie;
00322                         }
00323 
00324 
00325 #if s11n_SERIALIZER_ENABLE_INTERACTIVE
00326             bool is_cancelled() const { return m_cancelled; }
00327             void cancel() { this->m_cancelled = true; }
00328 
00329             node_type * deserialize( std::string const & src, progress_reporter & p )
00330             {
00331                 this->m_prog = &p;
00332                 node_type * n = 0;
00333                 try
00334                 {
00335                     n = this->deserialize( src );
00336                     this->m_prog = 0;
00337                 }
00338                 catch(...)
00339                 {
00340                     this->m_prog = 0;
00341                     throw;
00342                 }
00343                 return n;
00344             }
00345 
00346             node_type * deserialize( std::istream & src, progress_reporter & p )
00347             {
00348                 this->m_prog = &p;
00349                 node_type * n = 0;
00350                 try
00351                 {
00352                     n = this->deserialize( src );
00353                     this->m_prog = 0;
00354                 }
00355                 catch(...)
00356                 {
00357                     this->m_prog = 0;
00358                     throw;
00359                 }
00360                 return n;
00361             }
00362 
00363                         bool serialize( const node_type & src, std::ostream & dest, progress_reporter & p )
00364                         {
00365                 this->m_prog = &p;
00366                 bool b = false;
00367                 try
00368                 {
00369                     b = this->serialize( src, dest );
00370                     this->m_prog = 0;
00371                 }
00372                 catch(...)
00373                 {
00374                     this->m_prog = 0;
00375                     throw;
00376                 }
00377                 return b;
00378                         }
00379 
00380                         bool serialize( const node_type & src, std::string const & dest, progress_reporter & p )
00381                         {
00382                 this->m_prog = &p;
00383                 bool b = false;
00384                 try
00385                 {
00386                     b = this->serialize( src, dest );
00387                     this->m_prog = 0;
00388                 }
00389                 catch(...)
00390                 {
00391                     this->m_prog = 0;
00392                     throw;
00393                 }
00394                 return b;
00395                         }
00396 #endif // s11n_SERIALIZER_ENABLE_INTERACTIVE
00397 
00398                 protected:
00399                         /**
00400                            Sets the magic cookie for this type.
00401                         */
00402                         void magic_cookie( const std::string & c )
00403                         {
00404                                 this->m_cookie = c;
00405                         }
00406 
00407                         /**
00408                            metadata is an experimental feature
00409                            allowing serializers to store arbitrary
00410                            serializer-specific information in their
00411                            data steams.
00412                          */
00413                         node_type & metadata()
00414                         { return this->m_meta; }
00415                         /**
00416                            A const overload of metadata().
00417                          */
00418                         const node_type & metadata() const
00419                         { return this->m_meta;}
00420 
00421 #if s11n_SERIALIZER_ENABLE_INTERACTIVE
00422             void progress( size_t pos, size_t total )
00423             {
00424                 if( this->m_prog )
00425                 {
00426                     this->m_prog->operator()( pos, total );
00427                 }
00428             }
00429             void clear_cancel() { this->m_cancelled = false; }
00430             void assert_not_cancelled()
00431             {
00432                 if( this->is_cancelled() )
00433                 {
00434                     throw ::s11n::s11n_exception("Serializer operation was cancelled.");
00435                 }
00436             }
00437 #endif // s11n_SERIALIZER_ENABLE_INTERACTIVE
00438 
00439                 private:
00440                         std::string m_cookie;
00441                         node_type m_meta;
00442 #if s11n_SERIALIZER_ENABLE_INTERACTIVE
00443             bool m_cancelled;
00444             progress_reporter * m_prog;
00445 #endif
00446                 }; // data_node_serializer<>
00447 
00448         /**
00449            Tries to guess which Serializer can be used to read
00450            is. Returns an instance of that type on success or
00451            0 on error. The caller owns the returned object. It
00452            may propagate exceptions.
00453 
00454            Achtung: the first line of input from the input
00455            stream is consumed by this function (to find the
00456            cookie), which means that if this stream is handed
00457            off to the object which this function returns, it
00458            won't get its own cookie.  The only reliable way
00459            around this [that i know of] is to buffer the whole
00460            input as a string, and i don't wanna do that (it's
00461            really bad for massive data files).
00462 
00463            Special feature:
00464            
00465            If the first line of the stream is
00466            "#s11n::io::serializer CLASSNAME" or
00467            "#!/s11n/io/serialize CLASSNAME" then the CLASSNAME
00468            token is expected to be a Serializer class
00469            name. This function will try to classload that
00470            object.  If successful it will use that type to
00471            deserialize the input stream. If that fails, it
00472            will return 0.  The intention of this feature is to
00473            simplify creation of non-C++ tools which generate
00474            s11n data (e.g., perl scripts), so that they don't
00475            need to know the exact cookies.
00476 
00477            Added in 1.2.1.
00478         */
00479         template <typename NodeType>
00480         data_node_serializer<NodeType> *
00481         guess_serializer( std::istream & is )
00482         {
00483             typedef data_node_serializer<NodeType> ST;
00484             ST * ser = 0;
00485                         std::string cookie;
00486                         // CERR << "cookie="<<cookie<<std::endl;
00487             cookie = get_magic_cookie( is );
00488             if( cookie.empty() ) return 0;
00489             std::string opencmd = "#s11n::io::serializer ";
00490             std::string::size_type at = cookie.find( opencmd );
00491             if( std::string::npos == at )
00492             { // try new approach, added in 1.1.0:
00493                 opencmd = "#!/s11n/io/serializer ";
00494                 at = cookie.find( opencmd );
00495             }
00496 
00497             if( 0 == at )
00498             {
00499                 std::string dll = cookie.substr( opencmd.size() );
00500                 ser = ::s11n::cl::classload<ST>( dll );
00501             }
00502             else
00503             {
00504                 ser =  ::s11n::cl::classload<ST>( cookie );
00505             }
00506             return ser;
00507         }
00508 
00509         /**
00510            An overload which assumes infile is a local file.
00511 
00512            Added in 1.2.1.
00513         */
00514         template <typename NodeType>
00515         data_node_serializer<NodeType> *
00516         guess_serializer( std::string const & infile )
00517         {
00518             s11n::Detail::auto_ptr<std::istream> is( get_istream( infile.c_str() ) );
00519             return is.get()
00520                 ? guess_serializer<NodeType>( *is )
00521                 : 0;
00522         }
00523 
00524                 /**
00525 
00526                 Tries to load a NodeType object from the given
00527                 node. It uses the cookie from the input stream (the
00528                 first line) and uses
00529                 s11n::cl::classload<SerializerBaseType>() to find a
00530                 matching Serializer.
00531 
00532                 On error 0 is returned or an exception is thrown,
00533                 else a new pointer, which the caller owns.
00534 
00535                 ACHTUNG: Only usable for loading ROOT nodes.
00536 
00537         See guess_serializer( std::istream & is ) for more
00538         information, as that function is used to dispatch the
00539         stream.
00540 
00541                 */
00542                 template <typename NodeType>
00543                 NodeType * load_node( std::istream & is )
00544                 {
00545             typedef data_node_serializer<NodeType> ST;
00546             s11n::Detail::auto_ptr<ST> ser( guess_serializer<NodeType>( is ) );
00547             return ser.get()
00548                 ? ser->deserialize( is )
00549                 : 0;
00550                 }
00551 
00552                 /**
00553                    Similar to load_node( istream ), overloaded to take
00554                    a filename. This is handled separately from the
00555                    stream overload because some Serializers must
00556                    behave differently in the face of streams. e.g.,
00557                    db-based Serializers typically can't deal with
00558                    streams.
00559 
00560                    ACHTUNG: Only usable for loading ROOT nodes.
00561 
00562 
00563            Added in 1.2.1.
00564 
00565            Behaviour change in 1.3.0: The second argument to
00566            this function (bool ExternalData) was removed.
00567                 */
00568                 template <typename NodeType>
00569                 NodeType * load_node( const std::string & src )
00570                 {
00571             typedef data_node_serializer<NodeType> ST;
00572             s11n::Detail::auto_ptr<ST> ser( guess_serializer<NodeType>( src ) );
00573             return ser.get()
00574                 ? ser->deserialize( src )
00575                 : 0;
00576                 }
00577 
00578                 /**
00579                    Tries to load a SerializableT from the given stream.
00580                    On success returns a new object, else 0.
00581 
00582                    The caller owns the returned pointer.
00583 
00584                    ACHTUNG: Only usable for loading ROOT nodes.
00585                 */
00586                 template <typename NodeT,typename SerializableT>
00587                 SerializableT * load_serializable( std::istream & src )
00588                 {
00589                         typedef s11n::Detail::auto_ptr<NodeT> AP;
00590                         AP node( load_node<NodeT>( src ) );
00591                         if( ! node.get() )
00592                         {
00593                                 return 0;
00594                         }
00595                         return ::s11n::deserialize<NodeT,SerializableT>( *node );
00596                 }
00597 
00598                 /**
00599                    An overloaded form which takes an input string. If
00600                    ExternalData is true the string is treated as a file
00601                    name, otherwise it is processed as an input stream.
00602 
00603                    ACHTUNG: Only usable for loading ROOT nodes.
00604 
00605            Changed in 1.3.0: the second parameter to
00606            this function (bool ExternalData) was removed.
00607                 */
00608                 template <typename NodeT,typename SerializableT>
00609                 SerializableT * load_serializable( const std::string & src )
00610                 {
00611                         typedef s11n::Detail::auto_ptr<NodeT> AP;
00612                         AP node( load_node<NodeT>( src ) );
00613                         if( ! node.get() )
00614                         {
00615                                 return 0;
00616                         }
00617                         return ::s11n::deserialize<NodeT,SerializableT>( *node );
00618         }
00619 
00620         } // namespace io
00621 
00622 } // namespace s11n
00623 
00624 #endif // s11n_DATA_NODE_IO_H_INCLUDED

Generated on Wed Jun 4 21:44:19 2008 for libs11n by  doxygen 1.5.3