]> WWW 3 RAPTOR Library WWW Retrieval of URI content from the web. Synopsis typedef raptor_www; void raptor_www_init (void); void raptor_www_finish (void); void (*raptor_www_write_bytes_handler) (raptor_www *www, void *userdata, const void *ptr, size_t size, size_t nmemb); void (*raptor_www_content_type_handler) (raptor_www *www, void *userdata, const char *content_type); void raptor_www_no_www_library_init_finish (void); raptor_www* raptor_www_new (void); raptor_www* raptor_www_new_with_connection (void *connection); void raptor_www_free (raptor_www *www); void raptor_www_set_user_agent (raptor_www *www, const char *user_agent); void raptor_www_set_proxy (raptor_www *www, const char *proxy); void raptor_www_set_http_accept (raptor_www *www, const char *value); int raptor_www_set_http_cache_control (raptor_www *www, const char *cache_control); void raptor_www_set_write_bytes_handler (raptor_www *www, raptor_www_write_bytes_handler handler, void *user_data); void raptor_www_set_connection_timeout (raptor_www *www, int timeout); void raptor_www_set_content_type_handler (raptor_www *www, raptor_www_content_type_handler handler, void *user_data); void raptor_www_set_error_handler (raptor_www *www, raptor_message_handler error_handler, void *error_data); int (*raptor_uri_filter_func) (void *user_data, raptor_uri *uri); void raptor_www_set_uri_filter (raptor_www *www, raptor_uri_filter_func filter, void *user_data); void (*raptor_www_final_uri_handler) (raptor_www *www, void *userdata, raptor_uri *final_uri); raptor_uri* raptor_www_get_final_uri (raptor_www *www); void raptor_www_set_final_uri_handler (raptor_www *www, raptor_www_final_uri_handler handler, void *user_data); int raptor_www_fetch (raptor_www *www, raptor_uri *uri); int raptor_www_fetch_to_string (raptor_www *www, raptor_uri *uri, void **string_p, size_t *length_p, void* (malloc_handlersize_t size) ()); void* raptor_www_get_connection (raptor_www *www); void raptor_www_abort (raptor_www *www, const char *reason); Description Provides a wrapper to the resolution of URIs to give content using an underlying WWW-retrieval library. The content is delivered by callbacks and includes returning content type for handling content-negotation by the caller as well as chunks of byte content. Details raptor_www raptor_wwwraptor_www* raptor_www; Raptor WWW class raptor_www_init () raptor_www_initvoid raptor_www_init (void); Initialise the WWW class. Must be called before creating any raptor_www object. raptor_www_finish () raptor_www_finishvoid raptor_www_finish (void); Terminate the WWW class. Must be called to clean any resources used by the WWW implementation. raptor_www_write_bytes_handler () raptor_www_write_bytes_handlervoid (*raptor_www_write_bytes_handler) (raptor_www *www, void *userdata, const void *ptr, size_t size, size_t nmemb); Receiving bytes of data from WWW retrieval handler. Set by raptor_www_set_write_bytes_handler(). www : WWW object userdata : user data ptr : data pointer size : size of individual item nmemb : number of items raptor_www_content_type_handler () raptor_www_content_type_handlervoid (*raptor_www_content_type_handler) (raptor_www *www, void *userdata, const char *content_type); Receiving Content-Type: header from WWW retrieval handler. Set by raptor_www_set_content_type_handler(). www : WWW object userdata : user data content_type : content type seen raptor_www_no_www_library_init_finish () raptor_www_no_www_library_init_finishvoid raptor_www_no_www_library_init_finish (void); Do not initialise or finish the lower level WWW library. If this is called then the raptor_www library will neither initialise or terminate the lower level WWW library. Usually in raptor_init either curl_global_init (for libcurl) are called and in raptor_finish curl_global_cleanup is called. This allows the application finer control over these libraries such as setting other global options or potentially calling and terminating raptor several times. It does mean that applications which use this call must do their own extra work in order to allocate and free all resources to the system. This function must be called before raptor_init. raptor_www_new () raptor_www_newraptor_www* raptor_www_new (void); Constructor - create a new raptor_www object. Returns : a new raptor_www or NULL on failure. raptor_www_new_with_connection () raptor_www_new_with_connectionraptor_www* raptor_www_new_with_connection (void *connection); Constructor - create a new raptor_www object over an existing WWW connection. At present this only works with a libcurl CURL handle object when raptor is compiled with libcurl suppport. Otherwise the connection is ignored. This allows such things as setting up special flags on the curl handle before passing into the constructor. connection : external WWW connection object. Returns : a new raptor_www object or NULL on failure. raptor_www_free () raptor_www_freevoid raptor_www_free (raptor_www *www); Destructor - destroy a raptor_www object. www : WWW object. raptor_www_set_user_agent () raptor_www_set_user_agentvoid raptor_www_set_user_agent (raptor_www *www, const char *user_agent); Set the user agent value, for HTTP requests typically. www : WWW object user_agent : User-Agent string raptor_www_set_proxy () raptor_www_set_proxyvoid raptor_www_set_proxy (raptor_www *www, const char *proxy); Set the proxy for the WWW object. The proxy usually a string of the form http://server.domain:port. www : WWW object proxy : proxy string. raptor_www_set_http_accept () raptor_www_set_http_acceptvoid raptor_www_set_http_accept (raptor_www *www, const char *value); Set HTTP Accept header. www : raptor_www class value : Accept: header value or NULL to have an empty one. raptor_www_set_http_cache_control () raptor_www_set_http_cache_controlint raptor_www_set_http_cache_control (raptor_www *www, const char *cache_control); Set HTTP Cache-Control:header (default none) The cache_control value can be a string to set it, "" to send a blank header or NULL to not set the header at all. www : WWW object cache_control : Cache-Control header value (or NULL to disable) Returns : non-0 on failure raptor_www_set_write_bytes_handler () raptor_www_set_write_bytes_handlervoid raptor_www_set_write_bytes_handler (raptor_www *www, raptor_www_write_bytes_handler handler, void *user_data); Set the handler to receive bytes written by the raptor_www implementation. www : WWW object handler : bytes handler function user_data : bytes handler data raptor_www_set_connection_timeout () raptor_www_set_connection_timeoutvoid raptor_www_set_connection_timeout (raptor_www *www, int timeout); Set WWW connection timeout www : WWW object timeout : Timeout in seconds raptor_www_set_content_type_handler () raptor_www_set_content_type_handlervoid raptor_www_set_content_type_handler (raptor_www *www, raptor_www_content_type_handler handler, void *user_data); Set the handler to receive the HTTP Content-Type header value. This is called if or when the value is discovered during retrieval by the raptor_www implementation. Not all implementations provide access to this. www : WWW object handler : content type handler function user_data : content type handler data raptor_www_set_error_handler () raptor_www_set_error_handlervoid raptor_www_set_error_handler (raptor_www *www, raptor_message_handler error_handler, void *error_data); Set the error handler routine for the raptor_www class. This takes the same arguments as the raptor_parser_set_error() and raptor_parser_set_warning_handler() methods. www : WWW object error_handler : error handler function error_data : error handler data raptor_uri_filter_func () raptor_uri_filter_funcint (*raptor_uri_filter_func) (void *user_data, raptor_uri *uri); Callback function for raptor_www_set_uri_filter user_data : user data uri : raptor_uri URI to check Returns : non-0 to filter the URI raptor_www_set_uri_filter () raptor_www_set_uri_filtervoid raptor_www_set_uri_filter (raptor_www *www, raptor_uri_filter_func filter, void *user_data); Set URI filter function for WWW retrieval. www : WWW object filter : URI filter function user_data : User data to pass to filter function raptor_www_final_uri_handler () raptor_www_final_uri_handlervoid (*raptor_www_final_uri_handler) (raptor_www *www, void *userdata, raptor_uri *final_uri); Receiving the final resolved URI from a WWW retrieval Set by raptor_www_set_final_uri_handler(). www : WWW object userdata : user data final_uri : final URI seen raptor_www_get_final_uri () raptor_www_get_final_uriraptor_uri* raptor_www_get_final_uri (raptor_www *www); Get the WWW final resolved URI. This returns the URI used after any protocol redirection. www : raptor_www object Returns : a new URI or NULL if not known. raptor_www_set_final_uri_handler () raptor_www_set_final_uri_handlervoid raptor_www_set_final_uri_handler (raptor_www *www, raptor_www_final_uri_handler handler, void *user_data); Set the handler to receive the HTTP Content-Type header value. This is called if or when the value is discovered during retrieval by the raptor_www implementation. Not all implementations provide access to this. www : WWW object handler : content type handler function user_data : content type handler data raptor_www_fetch () raptor_www_fetchint raptor_www_fetch (raptor_www *www, raptor_uri *uri); Start a WWW content retrieval for the given URI, returning data via the write_bytes handler. www : WWW object uri : URI to read from Returns : non-0 on failure. raptor_www_fetch_to_string () raptor_www_fetch_to_stringint raptor_www_fetch_to_string (raptor_www *www, raptor_uri *uri, void **string_p, size_t *length_p, void* (malloc_handlersize_t size) ()); Start a WWW content retrieval for the given URI, returning the data in a new string. If malloc_handler is null, raptor will allocate it using it's own memory allocator. *string_p is set to NULL on failure (and *length_p to 0 if length_p is not NULL). www : raptor_www object uri : raptor_uri to retrieve string_p : pointer to location to hold string length_p : pointer to location to hold length of string (or NULL) malloc_handler : pointer to malloc to use to make string (or NULL) Returns : non-0 on failure raptor_www_get_connection () raptor_www_get_connectionvoid* raptor_www_get_connection (raptor_www *www); Get WWW library connection object. Return the internal WWW connection handle. For libcurl, this returns the CURL handle and for libxml the context. Otherwise it returns NULL. www : raptor_www object Returns : connection pointer raptor_www_abort () raptor_www_abortvoid raptor_www_abort (raptor_www *www, const char *reason); Abort an ongoing raptor WWW operation and pass back a reason. This is typically used within one of the raptor WWW handlers when retrieval need no longer continue due to another processing issue or error. www : WWW object reason : abort reason message