Main Page | Namespace List | Class Hierarchy | Alphabetical List | Class List | Directories | File List | Namespace Members | Class Members | File Members | Related Pages

data_node_io.hpp

Go to the documentation of this file.
00001 #ifndef s11n_DATA_NODE_IO_H_INCLUDED
00002 #define s11n_DATA_NODE_IO_H_INCLUDED
00003 
00004 ////////////////////////////////////////////////////////////////////////
00005 // data_node_io.hpp
00006 // some i/o interfaces & helpers for s11n
00007 // License: Public Domain
00008 // Author: stephan@s11n.net
00009 ////////////////////////////////////////////////////////////////////////
00010 
00011 
00012 #include <string>
00013 #include <sstream>
00014 #include <list>
00015 #include <map>
00016 #include <deque>
00017 #include <iostream>
00018 #include <memory>// auto_ptr
00019 
00020 #include <cassert>
00021 #include <typeinfo>
00022 
00023 
00024 
00025 // #include <s11n.net/cl/cllite.hpp> // for opening DLLs
00026 
00027 #include <s11n.net/s11n/phoenix.hpp> // phoenix class
00028 
00029 #include <s11n.net/s11n/exception.hpp>
00030 #include <s11n.net/s11n/s11n_debuggering_macros.hpp> // COUT/CERR
00031 #include <s11n.net/s11n/classload.hpp> // classloader()
00032 #include <s11n.net/s11n/serialize.hpp> // unfortunately dep
00033 #include <s11n.net/s11n/traits.hpp> // s11n_traits & node_traits
00034 
00035 #include <s11n.net/s11n/export.hpp> // for exporting symbols to DLL
00036 
00037 ////////////////////////////////////////////////////////////////////////////////
00038 // NO DEPS ON s11n_node.hpp ALLOWED!
00039 ////////////////////////////////////////////////////////////////////////////////
00040 
00041 
00042 namespace s11n {
00043 
00044         namespace io {
00045 
00046         /**
00047            Returns an output stream for the given file
00048            name. Caller owns the returned pointer, which may
00049            be 0.
00050 
00051            The returned stream supports libzl and libbz2 if your
00052            libs11n is built with libs11n_zfstream support, meaning
00053            it can read files compressed with zlib/gzip or bz2lib.
00054         */
00055         std::ostream * get_ostream( const std::string name );
00056 
00057         /**
00058            Returns an input stream for the given file
00059            name. Caller owns the returned pointer, which may
00060            be 0.
00061 
00062            The returned stream supports libzl and libbz2 if
00063            your libs11n is built with libs11n_zfstream
00064            support. That means that if
00065            zfstream::compression_policy() is set, then the
00066            returned string might be a compressing stream.
00067 
00068            If ExternalData is false then name is assumed to be
00069            a string containing input, and a string-reading stream
00070            is returned.
00071         */
00072         std::istream * get_istream( const std::string name, bool ExternalData = true );
00073 
00074                 /**
00075                    Convenience function for grabbing the first line of a file.
00076 
00077                    If ExternalData == true then returns the first line of the
00078                    file, else returns up to the first newline of src.
00079                 */
00080                 std::string get_magic_cookie( const std::string & src, bool ExternalData = true );
00081 
00082                 /**
00083                    Convenience function for grabbing the first line of a
00084                    stream.
00085 
00086                    Returns the first line of the given stream, or an
00087                    empty string on error.
00088                 */
00089                 std::string get_magic_cookie( std::istream & is );
00090 
00091                 /**
00092                    data_node_serializer provides an interface for
00093                    saving/loading a given abstract data node type
00094                    to/from streams.
00095 
00096                    It is designed for containers which comply with
00097                    s11n's Data Node interface and conventions.
00098 
00099 
00100                    Conventions:
00101 
00102                    Must provide:
00103 
00104                    typedef NodeT node_type
00105 
00106                    Two de/serialize functions, following the
00107                    stream-based interface shown here (filename-based
00108                    variants are optional, but convenient for clients).
00109 
00110                 */
00111                 template <typename NodeT>
00112                 class S11N_EXPORT_API data_node_serializer
00113                 {
00114                 public:
00115                         /**
00116                            The underlying data type used to store
00117                            serialized data.
00118                         */
00119                         typedef NodeT node_type;
00120 
00121 
00122                         data_node_serializer()
00123                         {
00124                                 this->magic_cookie( "WARNING: magic_cookie() not set!" );
00125                                 // ^^^ subclasses must do this.
00126                 typedef ::s11n::node_traits<node_type> NTR;
00127                 NTR::name( this->metadata(), "serializer_metadata" );
00128                                 // this->metadata().name( "serializer_metadata" );
00129                         };
00130                         virtual ~data_node_serializer(){};
00131 
00132 
00133                         /**
00134                            A convenience typedef, mainly for subclasses.
00135                         */
00136                         typedef std::map<std::string,std::string> translation_map;
00137 
00138                         /**
00139                            Returns a map intended for use with
00140                            ::s11n::io::strtool::translate_entities().
00141                            
00142                            The default implementation returns an empty map.
00143                            
00144                            Subclasses should override this to return a translation
00145                            map, if they need one. The default map is empty.
00146 
00147                            Be aware that this may very well be called
00148                            post-main(), so subclasses should take that into
00149                            account and provide post-main()-safe maps!
00150                         */
00151                         virtual const translation_map & entity_translations() const
00152                         {
00153                                 typedef ::s11n::Detail::phoenix<translation_map,data_node_serializer<node_type> > TMap;
00154                                 return TMap::instance();
00155                         }
00156 
00157 
00158 
00159                         /**
00160                            Must be implemented to format node_type to the given ostream.
00161 
00162                            It should return true on success, false on error.
00163 
00164                            The default implementation always returns false.
00165 
00166                            Note that this function does not use
00167                            s11n::serialize() in any way, and is only
00168                            coincidentally related to it.
00169                         */
00170                         virtual bool serialize( const node_type & /*src*/, std::ostream & /*dest*/ )
00171                         {
00172                                 return false;
00173                         }
00174 
00175                        /**
00176                            Overloaded to save dest to the given filename.
00177 
00178                            The default implementation treats destfile
00179                            as a file name and passes the call on to
00180                            serialize(node_type,ostream).  The output
00181                            file is compressed if zfstream::compression_policy()
00182                            has been set to enable it.
00183 
00184                            Returns true on success, false on error.
00185 
00186                            This function is virtual so that
00187                            Serializers which do not deal with
00188                            i/ostreams (e.g., those which use a
00189                            database connection) can override it to
00190                            interpret destfile as, e.g., a
00191                            database-related string (e.g., connection,
00192                            db object name, or whatever).
00193 
00194                Fixed in 1.0.2: returns false when destfile
00195                is empty.
00196                         */
00197                         virtual bool serialize( const node_type & src, const std::string & destfile )
00198                         {
00199                 if( destfile.empty() ) return false;
00200                                 std::ostream * os = ::s11n::io::get_ostream( destfile );
00201                                 if( ! os ) return false;
00202                                 bool b = this->serialize( src, *os );
00203                                 delete( os );
00204                                 return b;
00205                         }
00206 
00207                         /**
00208                            Must be implemented to parse a node_type from the given istream.
00209 
00210                            It should return true on success, false on error.
00211 
00212                            The default implementation always returns 0 and does nothing.
00213 
00214                            Note that this function does not use
00215                            s11n::deserialize() in any way, and is only
00216                            coincidentally related to it.
00217 
00218                Subclasses should try not to have to buffer
00219                the whole stream before parsing, because
00220                object trees can be arbitrarily large and a
00221                buffered copy effectively doubles the
00222                memory needed to store the tree during the
00223                deserialization process. Buffering
00224                behaviour is unspecified by this interface,
00225                however, and subclasses may pre-buffer the
00226                whole stream content if they need to.
00227                         */
00228                         virtual node_type * deserialize( std::istream & )
00229                         {
00230                                 return 0;
00231                         }
00232 
00233 
00234                         /**
00235                            Overloaded to load dest from the given filename.
00236 
00237                            It supports zlib/bz2lib decompression for
00238                            files if your s11n lib supports them.
00239 
00240                            This is virtual for the same reason as
00241                            serialize(string).
00242 
00243                         */
00244                         virtual node_type * deserialize( const std::string & src )
00245                         {
00246                                 typedef std::auto_ptr<std::istream> AP;
00247                                 AP is = AP( ::s11n::io::get_istream( src ) );
00248                                 if( ! is.get() ) return 0;
00249                                 return this->deserialize( *is );
00250                         }
00251 
00252 
00253                         /**
00254                            Gets this object's magic cookie.
00255 
00256                            Cookies are registered with
00257                            <code>class_loader< data_node_serializer<NodeType> ></code>
00258                            types to map files to file input parsers.
00259                         */
00260                         std::string magic_cookie() const
00261                         {
00262                                 return this->m_cookie;
00263                         }
00264 
00265                 protected:
00266                         /**
00267                            Sets the magic cookie for this type.
00268                         */
00269                         void magic_cookie( const std::string & c )
00270                         {
00271                                 this->m_cookie = c;
00272                         }
00273 
00274                         /**
00275                            metadata is an experimental feature
00276                            allowing serializers to store arbitrary
00277                            serializer-specific information in their
00278                            data steams.
00279                          */
00280                         node_type & metadata()
00281                         { return this->m_meta; }
00282                         /**
00283                            A const overload of metadata().
00284                          */
00285                         const node_type & metadata() const
00286                         { return this->m_meta;}
00287                 private:
00288                         std::string m_cookie;
00289                         node_type m_meta;
00290                 }; // data_node_serializer<>
00291 
00292                 /**
00293 
00294                 Tries to load a NodeType object from the given
00295                 node. It uses the cookie from the input stream (the
00296                 first line) and uses
00297                 s11n::cl::classload<SerializerBaseType>() to find a
00298                 matching Serializer.
00299 
00300                 On error 0 is returned or an exception is thrown,
00301                 else a new pointer, which the caller owns.
00302 
00303                 Achtung: the first line of input from the input stream
00304                 is consumed by this function (to find the cookie), and
00305                 the cookie is not passed on to the handler! The only
00306                 reliable way around this [that i know of] is to buffer
00307                 the whole input as a string, and i don't wanna do that
00308                 (it's really bad for massive data files).
00309 
00310                 ACHTUNG: Only usable for loading ROOT nodes.
00311 
00312                 Special feature:
00313 
00314                 If the first line of the stream is
00315                 "#s11n::io::serializer CLASSNAME" then the CLASSNAME
00316                 token is expected to be a Serializer class name. This
00317                 function will try to classload that object.  If
00318                 successful it will use that type to deserialize the
00319                 input stream. If that fails, it will return 0.
00320         The intention of this feature is to simplify creation
00321         of non-C++ tools which generate s11n data (e.g., perl
00322         scripts), so that they don't need to know the exact
00323         cookies.
00324                 */
00325                 template <typename NodeType>
00326                 NodeType *
00327                 load_node_classload_serializer( std::istream & is )
00328                 {
00329                         typedef data_node_serializer<NodeType> ST;
00330                         std::string cookie; // = get_magic_cookie( is );
00331                         // CERR << "cookie="<<cookie<<std::endl;
00332                         if( ! std::getline( is, cookie ) )
00333                         {
00334                                 CERR << "Odd: got a null cookie from the istream.\n";
00335                                 return 0; // happens post-main() on valid streams sometimes!?!?!
00336                         }
00337 
00338                         // CERR << "Dispatching to node loader for cookie ["<<cookie<<"]\n";
00339             try
00340             {
00341                 typedef std::auto_ptr<ST> AP;
00342                 AP ser;
00343                 std::string opencmd = "#s11n::io::serializer ";
00344                 std::string::size_type at = cookie.find( opencmd );
00345                 if( std::string::npos == at )
00346                 { // try new approach, added in 1.1.0:
00347                     opencmd = "#!/s11n/io/serializer ";
00348                     at = cookie.find( opencmd );
00349                 }
00350 
00351                 if( 0 == at )
00352                 {
00353                     std::string dll = cookie.substr( opencmd.size() );
00354                     //CERR << "Trying to load Serializer from cookie: " << dll << "\n";
00355                     ser = AP( ::s11n::cl::classload<ST>( dll ) );
00356                 }
00357                 else
00358                 {
00359                     ser = AP( ::s11n::cl::classload<ST>( cookie ) );
00360                 }
00361             
00362                 if( ! (ser.get()) )
00363                 {
00364                     CERR << "Did not find serializer for cookie ["<<cookie<<"]."<<std::endl;
00365                     return 0;
00366                 }
00367                 return ser->deserialize( is );
00368             }
00369             catch( const s11n_exception & sex )
00370             {
00371                 throw sex;
00372             }
00373             catch( const std::exception & ex ) // todo: consider allowing ser->deserialize() to pass through exceptions
00374             {
00375                 throw ::s11n::io_exception( ex.what(), __FILE__, __LINE__ );
00376             }
00377             catch( ... )
00378             {
00379                 throw ::s11n::io_exception( std::string("Stream-level deserialization failed for unknown reason. Cookie=")+cookie,
00380                                 __FILE__, __LINE__ );
00381             }
00382             return 0;
00383                 }
00384 
00385                 /**
00386                    Returns a node pointer, parsed from the given stream, using
00387                    <code>s11n::io::data_node_serializer<NodeType></code>
00388                    as the base type for looking up a stream handler.
00389 
00390                    ACHTUNG: Only usable for loading ROOT nodes.
00391                 */
00392                 template <typename NodeType>
00393                 NodeType * load_node( std::istream & is )
00394                 {
00395                         return load_node_classload_serializer< NodeType >( is );
00396                 }
00397 
00398                 /**
00399                    Overloaded form of load_node( istream ), provided for
00400                    convenience.
00401 
00402                    If ExternalData is true, input is treated as a file,
00403                    otherwise it is treated as a string containing input
00404                    to parse.
00405 
00406                    ACHTUNG: Only usable for loading ROOT nodes.
00407 
00408                    Maintenance note: ExternalData==false may be extremely
00409                    inefficient, as src may get copied one additional
00410                    time.
00411                 */
00412                 template <typename NodeType>
00413                 NodeType * load_node( const std::string & src, bool ExternalData = true )
00414                 {
00415                         typedef std::auto_ptr<std::istream> AP;
00416                         AP is = AP( ::s11n::io::get_istream( src, ExternalData ) );
00417                         if( ! is.get() ) return 0;
00418                         return load_node<NodeType>( *is );
00419                 }
00420 
00421                 /**
00422                    Tries to load a SerializableT from the given stream.
00423                    On success returns a new object, else 0.
00424 
00425                    The caller owns the returned pointer.
00426 
00427                    ACHTUNG: Only usable for loading ROOT nodes.
00428                 */
00429                 template <typename NodeT,typename SerializableT>
00430                 SerializableT * load_serializable( std::istream & src )
00431                 {
00432                         typedef std::auto_ptr<NodeT> AP;
00433                         AP node = AP( load_node<NodeT>( src ) );
00434                         if( ! node.get() )
00435                         {
00436                                 CERR << "load_serializable<>(istream) Could not load a root node from the input.\n";
00437                                 return 0;
00438                         }
00439                         return ::s11n::deserialize<NodeT,SerializableT>( *node );
00440                 }
00441 
00442                 /**
00443                    An overloaded form which takes an input string. If
00444                    ExternalData is true the string is treated as a file
00445                    name, otherwise it is processed as an input stream.
00446 
00447                    ACHTUNG: Only usable for loading ROOT nodes.
00448                 */
00449                 template <typename NodeT,typename SerializableT>
00450                 SerializableT * load_serializable( const std::string & src, bool ExternalData = true )
00451                 {
00452                         typedef std::auto_ptr<std::istream> AP;
00453                         AP is = AP( ::s11n::io::get_istream( src, ExternalData ) );
00454                         if( ! is.get() )
00455                         {
00456                                 CERR << "load_serializable<>(string) Could not load a root node from the input.\n";
00457                                 return 0;
00458                         }
00459                         return load_serializable<NodeT,SerializableT>( *is );
00460                 }
00461         } // namespace io
00462 
00463 } // namespace s11n
00464 
00465 #endif // s11n_DATA_NODE_IO_H_INCLUDED

Generated on Thu Sep 29 20:01:14 2005 for libs11n-1.1.3-dev by  doxygen 1.4.1