Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | Directories | File List | Namespace Members | Class Members | File Members | Related Pages

data_node_io.hpp

Go to the documentation of this file.
00001 #ifndef s11n_DATA_NODE_IO_H_INCLUDED
00002 #define s11n_DATA_NODE_IO_H_INCLUDED
00003 
00004 ////////////////////////////////////////////////////////////////////////
00005 // data_node_io.hpp
00006 // some i/o interfaces & helpers for s11n
00007 // License: Public Domain
00008 // Author: stephan@s11n.net
00009 ////////////////////////////////////////////////////////////////////////
00010 
00011 
00012 #include <string>
00013 #include <sstream>
00014 #include <list>
00015 #include <map>
00016 #include <deque>
00017 #include <iostream>
00018 #include <memory>// auto_ptr
00019 
00020 #include <cassert>
00021 #include <typeinfo>
00022 
00023 
00024 
00025 // #include <s11n.net/cl/cllite.hpp> // for opening DLLs
00026 
00027 #include <s11n.net/s11n/phoenix.hpp> // phoenix class
00028 
00029 #include <s11n.net/s11n/exception.hpp>
00030 #include <s11n.net/s11n/s11n_debuggering_macros.hpp> // COUT/CERR
00031 #include <s11n.net/s11n/classload.hpp> // classloader()
00032 #include <s11n.net/s11n/serialize.hpp> // unfortunately dep
00033 #include <s11n.net/s11n/traits.hpp> // s11n_traits & node_traits
00034 
00035 #include <s11n.net/s11n/export.hpp> // for exporting symbols to DLL
00036 
00037 ////////////////////////////////////////////////////////////////////////////////
00038 // NO DEPS ON s11n_node.hpp ALLOWED!
00039 ////////////////////////////////////////////////////////////////////////////////
00040 
00041 
00042 #define s11n_SERIALIZER_ENABLE_INTERACTIVE 0 /* an experiment. */
00043 
00044 namespace s11n {
00045 
00046         namespace io {
00047 
00048         /**
00049            Returns an output stream for the given file
00050            name. Caller owns the returned pointer, which may
00051            be 0.
00052 
00053            The returned stream supports libzl and libbz2 if your
00054            libs11n is built with libs11n_zfstream support, meaning
00055            it can read files compressed with zlib/gzip or bz2lib.
00056         */
00057         std::ostream * get_ostream( const std::string name );
00058 
00059         /**
00060            Returns an input stream for the given file
00061            name. Caller owns the returned pointer, which may
00062            be 0.
00063 
00064            The returned stream supports libzl and libbz2 if
00065            your libs11n is built with libs11n_zfstream
00066            support. That means that if
00067            zfstream::compression_policy() is set, then the
00068            returned string might be a compressing stream.
00069 
00070            If ExternalData is false then name is assumed to be
00071            a string containing input, and a string-reading stream
00072            is returned.
00073         */
00074         std::istream * get_istream( const std::string name, bool ExternalData = true );
00075 
00076                 /**
00077                    Convenience function for grabbing the first line of a file.
00078 
00079                    If ExternalData == true then returns the first line of the
00080                    file, else returns up to the first newline of src.
00081 
00082            See get_magic_cookie( istream & ) for notes on a minor functional
00083            change introduced in version 1.2.1.
00084                 */
00085                 std::string get_magic_cookie( const std::string & src, bool ExternalData = true );
00086 
00087                 /**
00088                    Convenience function for grabbing the first line of a
00089                    stream.
00090 
00091                    Returns the first line of the given stream, or an
00092                    empty string on error.
00093 
00094            As of version 1.2.1, this function behaves slightly
00095            differently than prior versions: the returned
00096            string will be the first consecutive non-control
00097            characters in the line.  This allows us to properly
00098            read some binary formats which use a string
00099            identifier as a magic cookie (e.g.  sqlite
00100            databases). In this context "control characters"
00101            are anything outside the range of ASCII values
00102            [32..126]. This change "shouldn't" affect any
00103            pre-1.2.1 behaviours, which were never tested/used
00104            with binary file formats.
00105                 */
00106                 std::string get_magic_cookie( std::istream & is );
00107 
00108 #if s11n_SERIALIZER_ENABLE_INTERACTIVE
00109         struct progress_reporter
00110         {
00111             progress_reporter() {}
00112             virtual ~progress_reporter() {}
00113             virtual void operator()( size_t pos, size_t total ) = 0;
00114         };
00115 #endif // s11n_SERIALIZER_ENABLE_INTERACTIVE
00116 
00117 
00118                 /**
00119                    data_node_serializer provides an interface for
00120                    saving/loading a given abstract data node type
00121                    to/from streams.
00122 
00123                    It is designed for containers which comply with
00124                    s11n's Data Node interface and conventions.
00125 
00126 
00127                    Conventions:
00128 
00129                    Must provide:
00130 
00131                    typedef NodeT node_type
00132 
00133                    Two de/serialize functions, following the
00134                    stream-based interface shown here (filename-based
00135                    variants are optional, but convenient for clients).
00136 
00137 
00138            Potential TODOs for 1.3/1.4:
00139 
00140            - Add cancel() and cancelled() to set/query the
00141            read state. This is to support Cancel operations in
00142            UIs which load slow-loading (sqlite3) formats or
00143            large sets and want to safely cancel. Once
00144            cancelled, a read is not restartable (or this is
00145            not guaranteed). All we can really do is provide a
00146            flag and conventions and hope implementations
00147            respect them.
00148 
00149            - Provide some sort of progress feedback mechanism,
00150            at least for reading, again to support users of
00151            slow Serializers. This is complicated by the
00152            unknown-read-size nature of Serializers.
00153                 */
00154                 template <typename NodeT>
00155                 class S11N_EXPORT_API data_node_serializer
00156                 {
00157                 public:
00158                         /**
00159                            The underlying data type used to store
00160                            serialized data.
00161                         */
00162                         typedef NodeT node_type;
00163 
00164 
00165                         data_node_serializer()
00166                         {
00167                                 this->magic_cookie( "WARNING: magic_cookie() not set!" );
00168                                 // ^^^ subclasses must do this.
00169                 typedef ::s11n::node_traits<node_type> NTR;
00170                 NTR::name( this->metadata(), "serializer_metadata" );
00171                                 // this->metadata().name( "serializer_metadata" );
00172                         };
00173                         virtual ~data_node_serializer(){};
00174 
00175 
00176                         /**
00177                            A convenience typedef, mainly for subclasses.
00178                         */
00179                         typedef std::map<std::string,std::string> translation_map;
00180 
00181                         /**
00182                            Returns a map intended for use with
00183                            ::s11n::io::strtool::translate_entities().
00184                            
00185                            The default implementation returns an empty map.
00186                            
00187                            Subclasses should override this to return a translation
00188                            map, if they need one. The default map is empty.
00189 
00190                            Be aware that this may very well be called
00191                            post-main(), so subclasses should take that into
00192                            account and provide post-main()-safe maps!
00193                         */
00194                         virtual const translation_map & entity_translations() const
00195                         {
00196                                 typedef ::s11n::Detail::phoenix<translation_map,data_node_serializer<node_type> > TMap;
00197                                 return TMap::instance();
00198                         }
00199 
00200 
00201 
00202                         /**
00203                            Must be implemented to format node_type to the given ostream.
00204 
00205                            It should return true on success, false on error.
00206 
00207                            The default implementation always returns false.
00208 
00209                            Note that this function does not use
00210                            s11n::serialize() in any way, and is only
00211                            coincidentally related to it.
00212                         */
00213                         virtual bool serialize( const node_type & /*src*/, std::ostream & /*dest*/ )
00214                         {
00215                                 return false;
00216                         }
00217 
00218                        /**
00219                            Overloaded to save dest to the given filename.
00220 
00221                            The default implementation treats destfile
00222                            as a file name and passes the call on to
00223                            serialize(node_type,ostream).  The output
00224                            file is compressed if zfstream::compression_policy()
00225                            has been set to enable it.
00226 
00227                            Returns true on success, false on error.
00228 
00229                            This function is virtual so that
00230                            Serializers which do not deal with
00231                            i/ostreams (e.g., those which use a
00232                            database connection) can override it to
00233                            interpret destfile as, e.g., a
00234                            database-related string (e.g., connection,
00235                            db object name, or whatever).
00236 
00237                Fixed in 1.0.2: returns false when destfile
00238                is empty.
00239                         */
00240                         virtual bool serialize( const node_type & src, const std::string & destfile )
00241                         {
00242                 if( destfile.empty() ) return false;
00243                                 std::ostream * os = ::s11n::io::get_ostream( destfile );
00244                                 if( ! os ) return false;
00245                                 bool b = this->serialize( src, *os );
00246                                 delete( os );
00247                                 return b;
00248                         }
00249 
00250                         /**
00251                            Must be implemented to parse a node_type from the given istream.
00252 
00253                            It should return true on success, false on error.
00254 
00255                            The default implementation always returns 0 and does nothing.
00256 
00257                            Note that this function does not use
00258                            s11n::deserialize() in any way, and is only
00259                            coincidentally related to it.
00260 
00261                Subclasses should try not to have to buffer
00262                the whole stream before parsing, because
00263                object trees can be arbitrarily large and a
00264                buffered copy effectively doubles the
00265                memory needed to store the tree during the
00266                deserialization process. Buffering
00267                behaviour is unspecified by this interface,
00268                however, and subclasses may pre-buffer the
00269                whole stream content if they need to.
00270                         */
00271                         virtual node_type * deserialize( std::istream & )
00272                         {
00273                                 return 0;
00274                         }
00275 
00276 
00277                         /**
00278                            Overloaded to load dest from the given filename.
00279 
00280                            It supports zlib/bz2lib decompression for
00281                            files if your s11n lib supports them.
00282 
00283                            This is virtual for the same reason as
00284                            serialize(string).
00285 
00286                         */
00287                         virtual node_type * deserialize( const std::string & src )
00288                         {
00289                                 typedef std::auto_ptr<std::istream> AP;
00290                                 AP is = AP( ::s11n::io::get_istream( src ) );
00291                                 if( ! is.get() ) return 0;
00292                                 return this->deserialize( *is );
00293                         }
00294 
00295 
00296                         /**
00297                            Gets this object's magic cookie.
00298 
00299                            Cookies are registered with
00300                            <code>class_loader< data_node_serializer<NodeType> ></code>
00301                            types to map files to file input parsers.
00302                         */
00303                         std::string magic_cookie() const
00304                         {
00305                                 return this->m_cookie;
00306                         }
00307 
00308 
00309 #if s11n_SERIALIZER_ENABLE_INTERACTIVE
00310             bool is_cancelled() const { return m_cancelled; }
00311             void cancel() { this->m_cancelled = true; }
00312 
00313             node_type * deserialize( std::string const & src, progress_reporter & p )
00314             {
00315                 this->m_prog = &p;
00316                 node_type * n = 0;
00317                 try
00318                 {
00319                     n = this->deserialize( src );
00320                     this->m_prog = 0;
00321                 }
00322                 catch(...)
00323                 {
00324                     this->m_prog = 0;
00325                     throw;
00326                 }
00327                 return n;
00328             }
00329 
00330             node_type * deserialize( std::istream & src, progress_reporter & p )
00331             {
00332                 this->m_prog = &p;
00333                 node_type * n = 0;
00334                 try
00335                 {
00336                     n = this->deserialize( src );
00337                     this->m_prog = 0;
00338                 }
00339                 catch(...)
00340                 {
00341                     this->m_prog = 0;
00342                     throw;
00343                 }
00344                 return n;
00345             }
00346 
00347                         bool serialize( const node_type & src, std::ostream & dest, progress_reporter & p )
00348                         {
00349                 this->m_prog = &p;
00350                 bool b = false;
00351                 try
00352                 {
00353                     b = this->serialize( src, dest );
00354                     this->m_prog = 0;
00355                 }
00356                 catch(...)
00357                 {
00358                     this->m_prog = 0;
00359                     throw;
00360                 }
00361                 return b;
00362                         }
00363 
00364                         bool serialize( const node_type & src, std::string const & dest, progress_reporter & p )
00365                         {
00366                 this->m_prog = &p;
00367                 bool b = false;
00368                 try
00369                 {
00370                     b = this->serialize( src, dest );
00371                     this->m_prog = 0;
00372                 }
00373                 catch(...)
00374                 {
00375                     this->m_prog = 0;
00376                     throw;
00377                 }
00378                 return b;
00379                         }
00380 #endif // s11n_SERIALIZER_ENABLE_INTERACTIVE
00381 
00382                 protected:
00383                         /**
00384                            Sets the magic cookie for this type.
00385                         */
00386                         void magic_cookie( const std::string & c )
00387                         {
00388                                 this->m_cookie = c;
00389                         }
00390 
00391                         /**
00392                            metadata is an experimental feature
00393                            allowing serializers to store arbitrary
00394                            serializer-specific information in their
00395                            data steams.
00396                          */
00397                         node_type & metadata()
00398                         { return this->m_meta; }
00399                         /**
00400                            A const overload of metadata().
00401                          */
00402                         const node_type & metadata() const
00403                         { return this->m_meta;}
00404 
00405 #if s11n_SERIALIZER_ENABLE_INTERACTIVE
00406             void progress( size_t pos, size_t total )
00407             {
00408                 if( this->m_prog )
00409                 {
00410                     this->m_prog->operator()( pos, total );
00411                 }
00412             }
00413             void clear_cancel() { this->m_cancelled = false; }
00414             void assert_not_cancelled()
00415             {
00416                 if( this->is_cancelled() )
00417                 {
00418                     throw ::s11n::s11n_exception("Serializer operation was cancelled.");
00419                 }
00420             }
00421 #endif // s11n_SERIALIZER_ENABLE_INTERACTIVE
00422 
00423                 private:
00424                         std::string m_cookie;
00425                         node_type m_meta;
00426 #if s11n_SERIALIZER_ENABLE_INTERACTIVE
00427             bool m_cancelled;
00428             progress_reporter * m_prog;
00429 #endif
00430                 }; // data_node_serializer<>
00431 
00432         /**
00433            Tries to guess which Serializer can be used to read
00434            is. Returns an instance of that type on success or
00435            0 on error. The caller owns the returned object. It
00436            may propagate exceptions.
00437 
00438            Achtung: the first line of input from the input
00439            stream is consumed by this function (to find the
00440            cookie), which means that if this stream is handed
00441            off to the object which this function returns, it
00442            won't get its own cookie.  The only reliable way
00443            around this [that i know of] is to buffer the whole
00444            input as a string, and i don't wanna do that (it's
00445            really bad for massive data files).
00446 
00447            Special feature:
00448            
00449            If the first line of the stream is
00450            "#s11n::io::serializer CLASSNAME" or
00451            "#!/s11n/io/serialize CLASSNAME" then the CLASSNAME
00452            token is expected to be a Serializer class
00453            name. This function will try to classload that
00454            object.  If successful it will use that type to
00455            deserialize the input stream. If that fails, it
00456            will return 0.  The intention of this feature is to
00457            simplify creation of non-C++ tools which generate
00458            s11n data (e.g., perl scripts), so that they don't
00459            need to know the exact cookies.
00460 
00461            Added in 1.2.1.
00462         */
00463         template <typename NodeType>
00464         data_node_serializer<NodeType> * guess_serializer( std::istream & is )
00465         {
00466             typedef data_node_serializer<NodeType> ST;
00467             ST * ser = 0;
00468                         std::string cookie;
00469                         // CERR << "cookie="<<cookie<<std::endl;
00470             cookie = get_magic_cookie( is );
00471             if( cookie.empty() ) return 0;
00472             std::string opencmd = "#s11n::io::serializer ";
00473             std::string::size_type at = cookie.find( opencmd );
00474             if( std::string::npos == at )
00475             { // try new approach, added in 1.1.0:
00476                 opencmd = "#!/s11n/io/serializer ";
00477                 at = cookie.find( opencmd );
00478             }
00479 
00480             if( 0 == at )
00481             {
00482                 std::string dll = cookie.substr( opencmd.size() );
00483                 ser = ::s11n::cl::classload<ST>( dll );
00484             }
00485             else
00486             {
00487                 ser =  ::s11n::cl::classload<ST>( cookie );
00488             }
00489             return ser;
00490         }
00491 
00492         /**
00493            An overload which assumes infile is a local file.
00494 
00495            Added in 1.2.1.
00496         */
00497         template <typename NodeType>
00498         data_node_serializer<NodeType> * guess_serializer( std::string const & infile )
00499         {
00500             std::auto_ptr<std::istream> is( get_istream( infile.c_str() ) );
00501             return is.get()
00502                 ? guess_serializer<NodeType>( *is )
00503                 : 0;
00504         }
00505 
00506                 /**
00507 
00508                 Tries to load a NodeType object from the given
00509                 node. It uses the cookie from the input stream (the
00510                 first line) and uses
00511                 s11n::cl::classload<SerializerBaseType>() to find a
00512                 matching Serializer.
00513 
00514                 On error 0 is returned or an exception is thrown,
00515                 else a new pointer, which the caller owns.
00516 
00517                 ACHTUNG: Only usable for loading ROOT nodes.
00518 
00519         See guess_serializer( std::istream & is ) for more
00520         information, as that function is used to dispatch the
00521         stream.
00522 
00523                 */
00524                 template <typename NodeType>
00525                 NodeType *
00526                 load_node_classload_serializer( std::istream & is )
00527                 {
00528             try
00529             {
00530                 typedef data_node_serializer<NodeType> ST;
00531                 std::auto_ptr<ST> ser( guess_serializer<NodeType>( is ) );
00532                 return ser.get()
00533                     ? ser->deserialize( is )
00534                     : 0;
00535             }
00536             catch( const s11n_exception & sex )
00537             {
00538                 throw sex;
00539             }
00540             catch( const std::exception & ex ) // todo: consider allowing ser->deserialize() to pass through exceptions
00541             {
00542                 throw ::s11n::io_exception( ex.what(), __FILE__, __LINE__ );
00543             }
00544             catch( ... )
00545             {
00546                 throw ::s11n::io_exception( std::string("Stream-level deserialization failed for unknown reason."),
00547                                 __FILE__, __LINE__ );
00548             }
00549             return 0;
00550                 }
00551 
00552         /**
00553            Overloaded to take a filename. This is handled
00554            separately from the stream overload because some
00555            Serializers must behave differently in the face of
00556            streams. e.g., db-based Serializers typically can't
00557            deal with streams.
00558 
00559            Added in 1.2.1.
00560         */
00561                 template <typename NodeType>
00562                 NodeType *
00563                 load_node_classload_serializer( std::string const & src )
00564                 {
00565                         typedef data_node_serializer<NodeType> ST;
00566             ST * ser = guess_serializer<NodeType>( src );
00567             if( ! ser ) return 0;
00568             try
00569             {
00570                 return ser->deserialize( src );
00571             }
00572             catch( const s11n_exception & sex )
00573             {
00574                 throw sex;
00575             }
00576             catch( const std::exception & ex ) // todo: consider allowing ser->deserialize() to pass through exceptions
00577             {
00578                 throw ::s11n::io_exception( ex.what(), __FILE__, __LINE__ );
00579             }
00580             catch( ... )
00581             {
00582                 throw ::s11n::io_exception( std::string("Stream-level deserialization failed for unknown reason."),
00583                                 __FILE__, __LINE__ );
00584             }
00585             return 0;
00586                 }
00587 
00588 
00589                 /**
00590                    Returns a node pointer, parsed from the given stream, using
00591                    <code>s11n::io::data_node_serializer<NodeType></code>
00592                    as the base type for looking up a stream handler.
00593 
00594                    ACHTUNG: Only usable for loading ROOT nodes.
00595                 */
00596                 template <typename NodeType>
00597                 NodeType * load_node( std::istream & is )
00598                 {
00599                         return load_node_classload_serializer< NodeType >( is );
00600                 }
00601 
00602                 /**
00603                    Overloaded form of load_node( istream ), provided for
00604                    convenience.
00605 
00606                    If ExternalData is true, input is treated as a file,
00607                    otherwise it is treated as a string containing input
00608                    to parse.
00609 
00610                    ACHTUNG: Only usable for loading ROOT nodes.
00611 
00612            Behaviour change in 1.2.1:
00613 
00614            If (ExternalData) then this call is eventually
00615            passed to ASerializer->deserialize(src). In
00616            previous versions, src was "converted" to a stream
00617            and passed to ASerializer->deserialize(istream),
00618            which does not work for some Serializers. This was
00619            fixed in 1.2.1 to allow the sqlite3 add-on to play
00620            along more transparently with s11nconvert and s11nbrowser.
00621                 */
00622                 template <typename NodeType>
00623                 NodeType * load_node( const std::string & src, bool ExternalData = true )
00624                 {
00625             if( ! ExternalData )
00626             {
00627                 typedef std::auto_ptr<std::istream> AP;
00628                 AP is( ::s11n::io::get_istream( src, ExternalData ) );
00629                 if( ! is.get() ) return 0;
00630                 return load_node<NodeType>( *is );
00631             }
00632             return load_node_classload_serializer<NodeType>( src );
00633                 }
00634 
00635                 /**
00636                    Tries to load a SerializableT from the given stream.
00637                    On success returns a new object, else 0.
00638 
00639                    The caller owns the returned pointer.
00640 
00641                    ACHTUNG: Only usable for loading ROOT nodes.
00642                 */
00643                 template <typename NodeT,typename SerializableT>
00644                 SerializableT * load_serializable( std::istream & src )
00645                 {
00646                         typedef std::auto_ptr<NodeT> AP;
00647                         AP node( load_node<NodeT>( src ) );
00648                         if( ! node.get() )
00649                         {
00650                                 CERR << "load_serializable<>(istream) Could not load a root node from the input.\n";
00651                                 return 0;
00652                         }
00653                         return ::s11n::deserialize<NodeT,SerializableT>( *node );
00654                 }
00655 
00656                 /**
00657                    An overloaded form which takes an input string. If
00658                    ExternalData is true the string is treated as a file
00659                    name, otherwise it is processed as an input stream.
00660 
00661                    ACHTUNG: Only usable for loading ROOT nodes.
00662 
00663            Behaviour chagne in 1.2.1 when (ExternalData):
00664            load_node(string) is used to load the snode tree,
00665            as opposed to load_node(stream). This change was to
00666            allow non-stream-friendly Serializers (e.g.,
00667            DB-based) to integrate more fully into s11n.
00668                 */
00669                 template <typename NodeT,typename SerializableT>
00670                 SerializableT * load_serializable( const std::string & src, bool ExternalData = true )
00671                 {
00672             if( ! ExternalData )
00673             {
00674                 typedef std::auto_ptr<std::istream> AP;
00675                 AP is( ::s11n::io::get_istream( src, ExternalData ) );
00676                 if( ! is.get() )
00677                 {
00678                     // CERR << "load_serializable<>(string) Could not load a root node from the input.\n";
00679                     return 0;
00680                 }
00681                 return load_serializable<NodeT,SerializableT>( *is );
00682             }
00683                         typedef std::auto_ptr<NodeT> AP;
00684                         AP node( load_node<NodeT>( src ) );
00685                         if( ! node.get() )
00686                         {
00687                                 // CERR << "load_serializable<>(string) Could not load a root node from the input.\n";
00688                                 return 0;
00689                         }
00690                         return ::s11n::deserialize<NodeT,SerializableT>( *node );
00691         }
00692 
00693         } // namespace io
00694 
00695 } // namespace s11n
00696 
00697 #endif // s11n_DATA_NODE_IO_H_INCLUDED

Generated on Sun Dec 18 18:30:27 2005 for libs11n-1.2.2 by  doxygen 1.4.4