1
0
mirror of https://github.com/cookiengineer/audacity synced 2025-07-31 07:59:27 +02:00
audacity/lib-src/redland/raptor/src/raptor_rdfxml.c
2010-01-24 09:19:39 +00:00

3193 lines
123 KiB
C

/* -*- Mode: c; c-basic-offset: 2 -*-
*
* raptor_rdfxml.c - Raptor RDF/XML Parser
*
* Copyright (C) 2000-2008, David Beckett http://www.dajobe.org/
* Copyright (C) 2000-2005, University of Bristol, UK http://www.bristol.ac.uk/
*
* This package is Free Software and part of Redland http://librdf.org/
*
* It is licensed under the following three licenses as alternatives:
* 1. GNU Lesser General Public License (LGPL) V2.1 or any newer version
* 2. GNU General Public License (GPL) V2 or any newer version
* 3. Apache License, V2.0 or any newer version
*
* You may not use this file except in compliance with at least one of
* the above three licenses.
*
* See LICENSE.html or LICENSE.txt at the top of this package for the
* complete terms and further detail along with the license texts for
* the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively.
*
*
*/
#ifdef HAVE_CONFIG_H
#include <raptor_config.h>
#endif
#ifdef WIN32
#include <win32_raptor_config.h>
#endif
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <stdarg.h>
#ifdef HAVE_ERRNO_H
#include <errno.h>
#endif
#ifdef HAVE_STDLIB_H
#include <stdlib.h>
#endif
/* Raptor includes */
#include "raptor.h"
#include "raptor_internal.h"
/* Define these for far too much output */
#undef RAPTOR_DEBUG_VERBOSE
#undef RAPTOR_DEBUG_CDATA
/* Raptor structures */
typedef enum {
/* Catch uninitialised state */
RAPTOR_STATE_INVALID = 0,
/* Skipping current tree of elements - used to recover finding
* illegal content, when parsling permissively.
*/
RAPTOR_STATE_SKIPPING,
/* Not in RDF grammar yet - searching for a start element.
*
* This can be <rdf:RDF> (goto NODE_ELEMENT_LIST) but since it is optional,
* the start element can also be one of
* http://www.w3.org/TR/rdf-syntax-grammar/#nodeElementURIs
*
* If RDF content is assumed, go straight to OBJ
*/
RAPTOR_STATE_UNKNOWN,
/* A list of node elements
* http://www.w3.org/TR/rdf-syntax-grammar/#nodeElementList
*/
RAPTOR_STATE_NODE_ELEMENT_LIST,
/* Found an <rdf:Description> */
RAPTOR_STATE_DESCRIPTION,
/* Found a property element
* http://www.w3.org/TR/rdf-syntax-grammar/#propertyElt
*/
RAPTOR_STATE_PROPERTYELT,
/* A property element that is an ordinal - rdf:li, rdf:_n
*/
RAPTOR_STATE_MEMBER_PROPERTYELT,
/* Found a node element
* http://www.w3.org/TR/rdf-syntax-grammar/#nodeElement
*/
RAPTOR_STATE_NODE_ELEMENT,
/* A property element with rdf:parseType="Literal"
* http://www.w3.org/TR/rdf-syntax-grammar/#parseTypeLiteralPropertyElt
*/
RAPTOR_STATE_PARSETYPE_LITERAL,
/* A property element with rdf:parseType="Resource"
* http://www.w3.org/TR/rdf-syntax-grammar/#parseTypeResourcePropertyElt
*/
RAPTOR_STATE_PARSETYPE_RESOURCE,
/* A property element with rdf:parseType="Collection"
* http://www.w3.org/TR/rdf-syntax-grammar/#parseTypeCollectionPropertyElt
*
* (This also handles daml:Collection)
*/
RAPTOR_STATE_PARSETYPE_COLLECTION,
/* A property element with a rdf:parseType attribute and a value
* not "Literal" or "Resource"
* http://www.w3.org/TR/rdf-syntax-grammar/#parseTypeOtherPropertyElt
*/
RAPTOR_STATE_PARSETYPE_OTHER,
RAPTOR_STATE_PARSETYPE_LAST = RAPTOR_STATE_PARSETYPE_OTHER
} raptor_state;
static const char * const raptor_state_names[RAPTOR_STATE_PARSETYPE_LAST+2]={
"INVALID",
"SKIPPING",
"UNKNOWN",
"nodeElementList",
"propertyElt",
"Description",
"propertyElt",
"memberPropertyElt",
"nodeElement",
"parseTypeLiteral",
"parseTypeResource",
"parseTypeCollection",
"parseTypeOther"
};
static const char * raptor_rdfxml_state_as_string(raptor_state state)
{
if(state<1 || state > RAPTOR_STATE_PARSETYPE_LAST)
state=(raptor_state)0;
return raptor_state_names[(int)state];
}
/*
* RDF/XML syntax terms, properties and classes.
* Must match names in rdf_syntax_terms_info below.
*/
typedef enum {
RDF_ATTR_RDF = 0,
RDF_ATTR_Description = 1,
RDF_ATTR_li = 2,
RDF_ATTR_about = 3, /* value of rdf:about attribute */
RDF_ATTR_aboutEach = 4, /* " rdf:aboutEach */
RDF_ATTR_aboutEachPrefix = 5, /* " rdf:aboutEachPrefix */
RDF_ATTR_ID = 6, /* " rdf:ID */
RDF_ATTR_bagID = 7, /* " rdf:bagID */
RDF_ATTR_resource = 8, /* " rdf:resource */
RDF_ATTR_parseType = 9, /* " rdf:parseType */
RDF_ATTR_nodeID = 10, /* " rdf:nodeID */
RDF_ATTR_datatype = 11, /* " rdf:datatype */
/* rdf:Property-s */
RDF_ATTR_type = 12, /* " rdf:type -- a property in RDF Model */
RDF_ATTR_value = 13, /* " rdf:value -- a property in RDF model */
RDF_ATTR_subject = 14, /* " rdf:subject -- a property in RDF model */
RDF_ATTR_predicate = 15, /* " rdf:predicate -- a property in RDF model */
RDF_ATTR_object = 16, /* " rdf:object -- a property in RDF model */
RDF_ATTR_first = 17, /* " rdf:first -- a property in RDF model */
RDF_ATTR_rest = 18, /* " rdf:rest -- a property in RDF model */
/* rdfs:Class-s */
RDF_ATTR_Seq = 19, /* " rdf:Seq -- a class in RDF Model */
RDF_ATTR_Bag = 20, /* " rdf:Bag -- a class in RDF model */
RDF_ATTR_Alt = 21, /* " rdf:Alt -- a class in RDF model */
RDF_ATTR_Statement = 22, /* " rdf:Statement -- a class in RDF model */
RDF_ATTR_Property = 23, /* " rdf:Property -- a class in RDF model */
RDF_ATTR_List = 24, /* " rdf:List -- a class in RDF model */
RDF_ATTR_XMLLiteral = 25, /* " rdf:XMLLiteral - a cless in RDF graph */
/* rdfs:Resource-s */
RDF_ATTR_nil = 26, /* " rdf:nil -- a resource in RDF graph */
RDF_ATTR_LAST = RDF_ATTR_nil
} rdf_attr;
/*
* http://www.w3.org/TR/rdf-syntax-grammar/#section-grammar-summary
*
* coreSyntaxTerms := rdf:RDF | rdf:ID | rdf:about | rdf:bagID |
rdf:parseType | rdf:resource | rdf:nodeID | rdf:datatype
* syntaxTerms := coreSyntaxTerms | rdf:Description | rdf:li
* oldTerms := rdf:aboutEach | rdf:aboutEachPrefix | rdf:bagID
*
* nodeElementURIs := anyURI - ( coreSyntaxTerms | rdf:li | oldTerms )
* propertyElementURIs := anyURI - ( coreSyntaxTerms | rdf:Description | oldTerms )
* propertyAttributeURIs := anyURI - ( coreSyntaxTerms | rdf:Description | rdf:li | oldTerms )
*
* So, forbidden terms in the RDF namespace are:
* nodeElements
* RDF | ID | about | bagID | parseType | resource | nodeID | datatype |
* li | aboutEach | aboutEachPrefix | bagID
*
* propertyElements
* RDF | ID | about | bagID | parseType | resource | nodeID | datatype |
* Description | aboutEach | aboutEachPrefix | bagID
*
* propertyAttributes
* RDF | ID | about | bagID | parseType | resource | nodeID | datatype |
* Description | li | aboutEach | aboutEachPrefix | bagID
*
* Information about rdf attributes:
* raptor_identifier_type type
* Set when the attribute is a property rather than just syntax
* NOTE: raptor_rdfxml_process_property_attributes() expects only
* RAPTOR_IDENTIFIER_TYPE_NONE,
* RAPTOR_IDENTIFIER_TYPE_LITERAL or RAPTOR_IDENTIFIER_TYPE_RESOURCE
* allowed_unprefixed_on_attribute
* If allowed for legacy reasons to be unprefixed as an attribute.
*
*/
static const struct {
const char * const name; /* term name */
int forbidden_as_nodeElement;
int forbidden_as_propertyElement;
int forbidden_as_propertyAttribute;
const raptor_identifier_type type; /* statement value */
int allowed_unprefixed_on_attribute;
} rdf_syntax_terms_info[]={
/* syntax only */
{ "RDF", 1, 1, 1, RAPTOR_IDENTIFIER_TYPE_UNKNOWN , 0 },
{ "Description", 0, 1, 1, RAPTOR_IDENTIFIER_TYPE_UNKNOWN , 0 },
{ "li", 1, 0, 1, RAPTOR_IDENTIFIER_TYPE_UNKNOWN , 0 },
{ "about", 1, 1, 1, RAPTOR_IDENTIFIER_TYPE_UNKNOWN , 1 },
{ "aboutEach", 1, 1, 1, RAPTOR_IDENTIFIER_TYPE_UNKNOWN , 0 },
{ "aboutEachPrefix", 1, 1, 1, RAPTOR_IDENTIFIER_TYPE_UNKNOWN , 0 },
{ "ID", 1, 1, 1, RAPTOR_IDENTIFIER_TYPE_UNKNOWN , 1 },
{ "bagID", 1, 1, 1, RAPTOR_IDENTIFIER_TYPE_UNKNOWN , 1 },
{ "resource", 1, 1, 1, RAPTOR_IDENTIFIER_TYPE_UNKNOWN , 1 },
{ "parseType", 1, 1, 1, RAPTOR_IDENTIFIER_TYPE_UNKNOWN , 1 },
{ "nodeID", 1, 1, 1, RAPTOR_IDENTIFIER_TYPE_UNKNOWN , 0 },
{ "datatype", 1, 1, 1, RAPTOR_IDENTIFIER_TYPE_UNKNOWN , 0 },
/* rdf:Property-s */
{ "type", 0, 0, 0, RAPTOR_IDENTIFIER_TYPE_RESOURCE, 1 },
{ "value", 0, 0, 0, RAPTOR_IDENTIFIER_TYPE_LITERAL , 0 },
{ "subject", 0, 0, 0, RAPTOR_IDENTIFIER_TYPE_LITERAL , 0 },
{ "predicate", 0, 0, 0, RAPTOR_IDENTIFIER_TYPE_LITERAL , 0 },
{ "object", 0, 0, 0, RAPTOR_IDENTIFIER_TYPE_LITERAL , 0 },
{ "first", 0, 0, 0, RAPTOR_IDENTIFIER_TYPE_LITERAL , 0 },
{ "rest", 0, 0, 0, RAPTOR_IDENTIFIER_TYPE_LITERAL , 0 },
/* rdfs:Class-s */
{ "Seq", 0, 0, 0, RAPTOR_IDENTIFIER_TYPE_LITERAL , 0 },
{ "Bag", 0, 0, 0, RAPTOR_IDENTIFIER_TYPE_LITERAL , 0 },
{ "Alt", 0, 0, 0, RAPTOR_IDENTIFIER_TYPE_LITERAL , 0 },
{ "Statement", 0, 0, 0, RAPTOR_IDENTIFIER_TYPE_LITERAL , 0 },
{ "Property", 0, 0, 0, RAPTOR_IDENTIFIER_TYPE_LITERAL , 0 },
{ "List", 0, 0, 0, RAPTOR_IDENTIFIER_TYPE_LITERAL , 0 },
{ "XMLLiteral", 0, 0, 0, RAPTOR_IDENTIFIER_TYPE_LITERAL , 0 },
/* rdfs:Resource-s */
{ "nil", 0, 0, 0, RAPTOR_IDENTIFIER_TYPE_LITERAL , 0 },
{ NULL , 0, 0, 0, RAPTOR_IDENTIFIER_TYPE_UNKNOWN , 0 }
};
static int
raptor_rdfxml_forbidden_nodeElement_name(const char *name)
{
int i;
if(*name == '_')
return 0;
for(i=0; rdf_syntax_terms_info[i].name; i++)
if(!strcmp(rdf_syntax_terms_info[i].name, name))
return rdf_syntax_terms_info[i].forbidden_as_nodeElement;
return -1;
}
static int
raptor_rdfxml_forbidden_propertyElement_name(const char *name)
{
int i;
if(*name == '_')
return 0;
for(i=0; rdf_syntax_terms_info[i].name; i++)
if(!strcmp(rdf_syntax_terms_info[i].name, (const char*)name))
return rdf_syntax_terms_info[i].forbidden_as_propertyElement;
return -1;
}
static int
raptor_rdfxml_forbidden_propertyAttribute_name(const char *name)
{
int i;
if(*name == '_')
return 0;
for(i=0; rdf_syntax_terms_info[i].name; i++)
if(!strcmp(rdf_syntax_terms_info[i].name, (const char*)name))
return rdf_syntax_terms_info[i].forbidden_as_propertyAttribute;
return -1;
}
typedef enum {
/* undetermined yet - whitespace is stored */
RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_UNKNOWN,
/* literal content - no elements, cdata allowed, whitespace significant
* <propElement> blah </propElement>
*/
RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LITERAL,
/* parseType literal content (WF XML) - all content preserved
* <propElement rdf:parseType="Literal"><em>blah</em></propElement>
*/
RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_XML_LITERAL,
/* top-level nodes - 0+ elements expected, no cdata, whitespace ignored,
* any non-whitespace cdata is error
* only used for <rdf:RDF> or implict <rdf:RDF>
*/
RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_NODES,
/* properties - 0+ elements expected, no cdata, whitespace ignored,
* any non-whitespace cdata is error
* <nodeElement><prop1>blah</prop1> <prop2>blah</prop2> </nodeElement>
*/
RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTIES,
/* property content - all content preserved
* any content type changes when first non-whitespace found
* <propElement>...
*/
RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTY_CONTENT,
/* resource URI given - no element, no cdata, whitespace ignored,
* any non-whitespace cdata is error
* <propElement rdf:resource="uri"/>
* <propElement rdf:resource="uri"></propElement>
*/
RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_RESOURCE,
/* skipping content - all content is preserved
* Used when skipping content for unknown parseType-s,
* error recovery, some other reason
*/
RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PRESERVED,
/* parseType Collection - all content preserved
* Parsing of this determined by RDF/XML (Revised) closed collection rules
* <propElement rdf:parseType="Collection">...</propElement>
*/
RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_COLLECTION,
/* Like above but handles "daml:collection" */
RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION,
/* dummy for use in strings below */
RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LAST
} raptor_rdfxml_element_content_type;
static const struct {
const char * const name;
const int whitespace_significant;
/* non-blank cdata */
const int cdata_allowed;
/* XML element content */
const int element_allowed;
/* Do RDF-specific processing? (property attributes, rdf: attributes, ...) */
const int rdf_processing;
} rdf_content_type_info[RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LAST]={
{"Unknown", 1, 1, 1, 0 },
{"Literal", 1, 1, 0, 0 },
{"XML Literal", 1, 1, 1, 0 },
{"Nodes", 0, 0, 1, 1 },
{"Properties", 0, 1, 1, 1 },
{"Property Content",1, 1, 1, 1 },
{"Resource", 0, 0, 0, 0 },
{"Preserved", 1, 1, 1, 0 },
{"Collection", 1, 1, 1, 1 },
{"DAML Collection", 1, 1, 1, 1 },
};
static const char *
raptor_rdfxml_element_content_type_as_string(raptor_rdfxml_element_content_type type)
{
if(type > RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LAST)
return "INVALID";
return rdf_content_type_info[type].name;
}
/*
* Raptor Element/attributes on stack
*/
struct raptor_rdfxml_element_s {
raptor_xml_element *xml_element;
/* NULL at bottom of stack */
struct raptor_rdfxml_element_s *parent;
/* attributes declared in M&S */
const unsigned char * rdf_attr[RDF_ATTR_LAST+1];
/* how many of above seen */
int rdf_attr_count;
/* state that this production matches */
raptor_state state;
/* how to handle the content inside this XML element */
raptor_rdfxml_element_content_type content_type;
/* starting state for children of this element */
raptor_state child_state;
/* starting content type for children of this element */
raptor_rdfxml_element_content_type child_content_type;
/* STATIC Reified statement identifier */
raptor_identifier reified;
/* STATIC Bag identifier */
raptor_identifier bag;
int last_bag_ordinal; /* starts at 0, so first predicate is rdf:_1 */
/* STATIC Subject identifier (URI/anon ID), type, source
*
* When the XML element represents a node, this is the identifier
*/
raptor_identifier subject;
/* STATIC Predicate URI, source is either
* RAPTOR_URI_SOURCE_ELEMENT or RAPTOR_URI_SOURCE_ATTRIBUTE
*
* When the XML element represents a node or predicate,
* this is the identifier of the predicate
*/
raptor_identifier predicate;
/* STATIC Object identifier (URI/anon ID), type, source
*
* When this XML element generates a statement that needs an object,
* possibly from a child element, this is the identifier of the object
*/
raptor_identifier object;
/* URI of datatype of literal */
raptor_uri *object_literal_datatype;
/* last ordinal used, so initialising to 0 works, emitting rdf:_1 first */
int last_ordinal;
/* If this element's parseType is a Collection
* this identifies the anon node of current tail of the collection(list).
*/
const unsigned char *tail_id;
/* RDF/XML specific checks */
/* all cdata so far is whitespace */
unsigned int content_cdata_all_whitespace;
};
typedef struct raptor_rdfxml_element_s raptor_rdfxml_element;
#define RAPTOR_RDFXML_N_CONCEPTS 22
/*
* Raptor parser object
*/
struct raptor_rdfxml_parser_s {
raptor_sax2 *sax2;
/* stack of elements - elements add after current_element */
raptor_rdfxml_element *root_element;
raptor_rdfxml_element *current_element;
raptor_uri* concepts[RAPTOR_RDFXML_N_CONCEPTS];
/* set of seen rdf:ID / rdf:bagID values (with in-scope base URI) */
raptor_id_set* id_set;
void *xml_content;
size_t xml_content_length;
raptor_iostream* iostream;
/* writer for building parseType="Literal" content */
raptor_xml_writer* xml_writer;
};
/* static variables */
#define RAPTOR_RDF_type_URI(rdf_xml_parser) rdf_xml_parser->concepts[0]
#define RAPTOR_RDF_value_URI(rdf_xml_parser) rdf_xml_parser->concepts[1]
#define RAPTOR_RDF_subject_URI(rdf_xml_parser) rdf_xml_parser->concepts[2]
#define RAPTOR_RDF_predicate_URI(rdf_xml_parser) rdf_xml_parser->concepts[3]
#define RAPTOR_RDF_object_URI(rdf_xml_parser) rdf_xml_parser->concepts[4]
#define RAPTOR_RDF_Statement_URI(rdf_xml_parser) rdf_xml_parser->concepts[5]
#define RAPTOR_RDF_Seq_URI(rdf_xml_parser) rdf_xml_parser->concepts[6]
#define RAPTOR_RDF_Bag_URI(rdf_xml_parser) rdf_xml_parser->concepts[7]
#define RAPTOR_RDF_Alt_URI(rdf_xml_parser) rdf_xml_parser->concepts[8]
#define RAPTOR_RDF_List_URI(rdf_xml_parser) rdf_xml_parser->concepts[9]
#define RAPTOR_RDF_first_URI(rdf_xml_parser) rdf_xml_parser->concepts[10]
#define RAPTOR_RDF_rest_URI(rdf_xml_parser) rdf_xml_parser->concepts[11]
#define RAPTOR_RDF_nil_URI(rdf_xml_parser) rdf_xml_parser->concepts[12]
#define RAPTOR_DAML_NS_URI(rdf_xml_parser) rdf_xml_parser->concepts[13]
#define RAPTOR_DAML_List_URI(rdf_xml_parser) rdf_xml_parser->concepts[14]
#define RAPTOR_DAML_first_URI(rdf_xml_parser) rdf_xml_parser->concepts[15]
#define RAPTOR_DAML_rest_URI(rdf_xml_parser) rdf_xml_parser->concepts[16]
#define RAPTOR_DAML_nil_URI(rdf_xml_parser) rdf_xml_parser->concepts[17]
#define RAPTOR_RDF_RDF_URI(rdf_xml_parser) rdf_xml_parser->concepts[18]
#define RAPTOR_RDF_Description_URI(rdf_xml_parser) rdf_xml_parser->concepts[19]
#define RAPTOR_RDF_li_URI(rdf_xml_parser) rdf_xml_parser->concepts[20]
#define RAPTOR_RDF_XMLLiteral_URI(rdf_xml_parser) rdf_xml_parser->concepts[21]
/* RAPTOR_RDFXML_N_CONCEPTS defines size of array */
/* prototypes for element functions */
static raptor_rdfxml_element* raptor_rdfxml_element_pop(raptor_rdfxml_parser *rdf_parser);
static void raptor_rdfxml_element_push(raptor_rdfxml_parser *rdf_parser, raptor_rdfxml_element* element);
static int raptor_rdfxml_record_ID(raptor_parser *rdf_parser, raptor_rdfxml_element *element, const unsigned char *id);
/* prototypes for grammar functions */
static void raptor_rdfxml_start_element_grammar(raptor_parser *parser, raptor_rdfxml_element *element);
static void raptor_rdfxml_end_element_grammar(raptor_parser *parser, raptor_rdfxml_element *element);
static void raptor_rdfxml_cdata_grammar(raptor_parser *parser, const unsigned char *s, int len, int is_cdata);
/* prototype for statement related functions */
static void raptor_rdfxml_generate_statement(raptor_parser *rdf_parser, raptor_uri *subject_uri, const unsigned char *subject_id, const raptor_identifier_type subject_type, const raptor_uri_source subject_uri_source, raptor_uri *predicate_uri, const unsigned char *predicate_id, const raptor_identifier_type predicate_type, const raptor_uri_source predicate_uri_source, int predicate_ordinal, raptor_uri *object_uri, const unsigned char *object_id, const raptor_identifier_type object_type, const raptor_uri_source object_uri_source, raptor_uri *literal_datatype, raptor_identifier *reified, raptor_rdfxml_element *bag_element);
/* Prototypes for parsing data functions */
static int raptor_rdfxml_parse_init(raptor_parser* rdf_parser, const char *name);
static void raptor_rdfxml_parse_terminate(raptor_parser *rdf_parser);
static int raptor_rdfxml_parse_start(raptor_parser* rdf_parser);
static int raptor_rdfxml_parse_chunk(raptor_parser* rdf_parser, const unsigned char *buffer, size_t len, int is_end);
static void raptor_rdfxml_update_document_locator(raptor_parser *rdf_parser);
static raptor_uri* raptor_rdfxml_inscope_base_uri(raptor_parser *rdf_parser);
static raptor_rdfxml_element*
raptor_rdfxml_element_pop(raptor_rdfxml_parser *rdf_xml_parser)
{
raptor_rdfxml_element *element=rdf_xml_parser->current_element;
if(!element)
return NULL;
rdf_xml_parser->current_element=element->parent;
if(rdf_xml_parser->root_element == element) /* just deleted root */
rdf_xml_parser->root_element=NULL;
return element;
}
static void
raptor_rdfxml_element_push(raptor_rdfxml_parser *rdf_xml_parser, raptor_rdfxml_element* element)
{
element->parent=rdf_xml_parser->current_element;
rdf_xml_parser->current_element=element;
if(!rdf_xml_parser->root_element)
rdf_xml_parser->root_element=element;
}
static void
raptor_free_rdfxml_element(raptor_rdfxml_element *element)
{
int i;
/* Free special RDF M&S attributes */
for(i=0; i<= RDF_ATTR_LAST; i++)
if(element->rdf_attr[i])
RAPTOR_FREE(cstring, (void*)element->rdf_attr[i]);
raptor_free_identifier(&element->subject);
raptor_free_identifier(&element->predicate);
raptor_free_identifier(&element->object);
raptor_free_identifier(&element->bag);
raptor_free_identifier(&element->reified);
if(element->tail_id)
RAPTOR_FREE(cstring, (char*)element->tail_id);
if(element->object_literal_datatype)
raptor_free_uri(element->object_literal_datatype);
RAPTOR_FREE(raptor_rdfxml_element, element);
}
static void
raptor_rdfxml_sax2_new_namespace_handler(void *user_data,
raptor_namespace* nspace)
{
raptor_parser* rdf_parser;
const unsigned char* namespace_name;
size_t namespace_name_len;
raptor_uri* uri=raptor_namespace_get_uri(nspace);
rdf_parser=(raptor_parser*)user_data;
raptor_parser_start_namespace(rdf_parser, nspace);
if(!uri)
return;
namespace_name=raptor_uri_as_counted_string(uri, &namespace_name_len);
if(namespace_name_len == raptor_rdf_namespace_uri_len-1 &&
!strncmp((const char*)namespace_name,
(const char*)raptor_rdf_namespace_uri,
namespace_name_len)) {
const unsigned char *prefix=raptor_namespace_get_prefix(nspace);
raptor_parser_warning(rdf_parser, "Declaring a namespace with prefix %s to URI %s - one letter short of the RDF namespace URI and probably a mistake.", prefix, namespace_name);
}
if(namespace_name_len > raptor_rdf_namespace_uri_len &&
!strncmp((const char*)namespace_name,
(const char*)raptor_rdf_namespace_uri,
raptor_rdf_namespace_uri_len)) {
raptor_parser_error(rdf_parser, "Declaring a namespace URI %s to which the RDF namespace URI is a prefix is forbidden.", namespace_name);
}
}
static void
raptor_rdfxml_start_element_handler(void *user_data,
raptor_xml_element* xml_element)
{
raptor_parser* rdf_parser;
raptor_rdfxml_parser* rdf_xml_parser;
raptor_rdfxml_element* element;
int ns_attributes_count=0;
raptor_qname** named_attrs=NULL;
int i;
int count_bumped=0;
rdf_parser=(raptor_parser*)user_data;
rdf_xml_parser=(raptor_rdfxml_parser*)rdf_parser->context;
if(rdf_parser->failed)
return;
raptor_rdfxml_update_document_locator(rdf_parser);
/* Create new element structure */
element=(raptor_rdfxml_element*)RAPTOR_CALLOC(raptor_rdfxml_element, 1,
sizeof(raptor_rdfxml_element));
if(!element) {
raptor_parser_fatal_error(rdf_parser, "Out of memory");
rdf_parser->failed=1;
return;
}
element->xml_element=xml_element;
raptor_rdfxml_element_push(rdf_xml_parser, element);
named_attrs=raptor_xml_element_get_attributes(xml_element);
ns_attributes_count=raptor_xml_element_get_attributes_count(xml_element);
/* RDF-specific processing of attributes */
if(ns_attributes_count) {
raptor_qname** new_named_attrs;
int offset = 0;
raptor_rdfxml_element* parent_element;
parent_element=element->parent;
/* Allocate new array to move namespaced-attributes to if
* rdf processing is performed
*/
new_named_attrs=(raptor_qname**)RAPTOR_CALLOC(raptor_qname_array,
ns_attributes_count,
sizeof(raptor_qname*));
if(!new_named_attrs) {
raptor_parser_fatal_error(rdf_parser, "Out of memory");
rdf_parser->failed=1;
return;
}
for (i = 0; i < ns_attributes_count; i++) {
raptor_qname* attr=named_attrs[i];
/* If:
* 1 We are handling RDF content and RDF processing is allowed on
* this element
* OR
* 2 We are not handling RDF content and
* this element is at the top level (top level Desc. / typedNode)
* i.e. we have no parent
* then handle the RDF attributes
*/
if((parent_element &&
rdf_content_type_info[parent_element->child_content_type].rdf_processing) ||
!parent_element) {
/* Save pointers to some RDF M&S attributes */
/* If RDF namespace-prefixed attributes */
if(attr->nspace && attr->nspace->is_rdf_ms) {
const unsigned char *attr_name=attr->local_name;
int j;
for(j=0; j<= RDF_ATTR_LAST; j++)
if(!strcmp((const char*)attr_name, rdf_syntax_terms_info[j].name)) {
element->rdf_attr[j]=attr->value;
element->rdf_attr_count++;
/* Delete it if it was stored elsewhere */
#ifdef RAPTOR_DEBUG_VERBOSE
RAPTOR_DEBUG3("Found RDF namespace attribute '%s' URI %s\n", (char*)attr_name, attr->value);
#endif
/* make sure value isn't deleted from qname structure */
attr->value=NULL;
raptor_free_qname(attr);
attr=NULL;
break;
}
} /* end if RDF namespaced-prefixed attributes */
if(!attr)
continue;
/* If non namespace-prefixed RDF attributes found on an element */
if(rdf_parser->features[RAPTOR_FEATURE_ALLOW_NON_NS_ATTRIBUTES] &&
!attr->nspace) {
const unsigned char *attr_name=attr->local_name;
int j;
for(j=0; j<= RDF_ATTR_LAST; j++)
if(!strcmp((const char*)attr_name, rdf_syntax_terms_info[j].name)) {
element->rdf_attr[j]=attr->value;
element->rdf_attr_count++;
if(!rdf_syntax_terms_info[j].allowed_unprefixed_on_attribute)
raptor_parser_warning(rdf_parser, "Using rdf attribute '%s' without the RDF namespace has been deprecated.", attr_name);
/* Delete it if it was stored elsewhere */
/* make sure value isn't deleted from qname structure */
attr->value=NULL;
raptor_free_qname(attr);
attr=NULL;
break;
}
} /* end if non-namespace prefixed RDF attributes */
if(!attr)
continue;
} /* end if leave literal XML alone */
if(attr)
new_named_attrs[offset++]=attr;
}
/* new attribute count is set from attributes that haven't been skipped */
ns_attributes_count=offset;
if(!ns_attributes_count) {
/* all attributes were deleted so delete the new array */
RAPTOR_FREE(raptor_qname_array, new_named_attrs);
new_named_attrs=NULL;
}
RAPTOR_FREE(raptor_qname_array, named_attrs);
named_attrs=new_named_attrs;
raptor_xml_element_set_attributes(xml_element,
named_attrs, ns_attributes_count);
} /* end if ns_attributes_count */
/* start from unknown; if we have a parent, it may set this */
element->state=RAPTOR_STATE_UNKNOWN;
element->content_type=RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_UNKNOWN;
if(element->parent &&
element->parent->child_content_type != RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_UNKNOWN) {
element->content_type=element->parent->child_content_type;
if(element->parent->content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_RESOURCE &&
element->content_type != RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_COLLECTION &&
element->content_type != RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION) {
/* If parent has an rdf:resource, this element should not be here */
raptor_parser_error(rdf_parser, "property element '%s' has multiple object node elements, skipping.",
raptor_xml_element_get_name(element->parent->xml_element)->local_name);
element->state=RAPTOR_STATE_SKIPPING;
element->content_type=RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PRESERVED;
} else {
if(!element->parent->child_state) {
raptor_parser_fatal_error(rdf_parser, "raptor_rdfxml_start_element_handler: no parent element child_state set");
return;
}
element->state=element->parent->child_state;
element->parent->xml_element->content_element_seen++;
count_bumped++;
/* leave literal XML alone */
if (!rdf_content_type_info[element->content_type].cdata_allowed) {
if(element->parent->xml_element->content_element_seen &&
element->parent->xml_element->content_cdata_seen) {
/* Uh oh - mixed content, the parent element has cdata too */
raptor_parser_warning(rdf_parser, "element '%s' has mixed content.",
raptor_xml_element_get_name(element->parent->xml_element)->local_name);
}
/* If there is some existing all-whitespace content cdata
* before this node element, delete it
*/
if(element->parent->content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTIES &&
element->parent->xml_element->content_element_seen &&
element->parent->content_cdata_all_whitespace &&
element->parent->xml_element->content_cdata_length) {
element->parent->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_RESOURCE;
raptor_free_stringbuffer(element->parent->xml_element->content_cdata_sb);
element->parent->xml_element->content_cdata_sb=NULL;
element->parent->xml_element->content_cdata_length=0;
}
} /* end if leave literal XML alone */
} /* end if parent has no rdf:resource */
} /* end if element->parent */
#ifdef RAPTOR_DEBUG_VERBOSE
RAPTOR_DEBUG2("Using content type %s\n", rdf_content_type_info[element->content_type].name);
fprintf(stderr, "raptor_rdfxml_start_element_handler: Start ns-element: ");
raptor_print_xml_element(xml_element, stderr);
#endif
/* Check for non namespaced stuff when not in a parseType literal, other */
if (rdf_content_type_info[element->content_type].rdf_processing) {
/* The element */
/* If has no namespace or the namespace has no name (xmlns="") */
if(!raptor_xml_element_get_name(xml_element)->nspace ||
(raptor_xml_element_get_name(xml_element)->nspace &&
!raptor_namespace_get_uri(raptor_xml_element_get_name(xml_element)->nspace))) {
raptor_parser_error(rdf_parser, "Using an element '%s' without a namespace is forbidden.",
raptor_xml_element_get_name(element->parent->xml_element)->local_name);
element->state=RAPTOR_STATE_SKIPPING;
/* Remove count above so that parent thinks this is empty */
if(count_bumped)
element->parent->xml_element->content_element_seen--;
element->content_type=RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PRESERVED;
}
/* Check for any remaining non-namespaced attributes */
if (named_attrs) {
for(i=0; i < ns_attributes_count; i++) {
raptor_qname *attr=named_attrs[i];
/* Check if any attributes are non-namespaced */
if(!attr->nspace ||
(attr->nspace && !raptor_namespace_get_uri(attr->nspace))) {
raptor_parser_error(rdf_parser, "Using an attribute '%s' without a namespace is forbidden.", attr->local_name);
raptor_free_qname(attr);
named_attrs[i]=NULL;
}
}
}
}
if (element->rdf_attr[RDF_ATTR_aboutEach] ||
element->rdf_attr[RDF_ATTR_aboutEachPrefix]) {
raptor_parser_warning(rdf_parser, "element '%s' has aboutEach / aboutEachPrefix, skipping.",
raptor_xml_element_get_name(xml_element)->local_name);
element->state=RAPTOR_STATE_SKIPPING;
/* Remove count above so that parent thinks this is empty */
if(count_bumped)
element->parent->xml_element->content_element_seen--;
element->content_type=RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PRESERVED;
}
/* Right, now ready to enter the grammar */
raptor_rdfxml_start_element_grammar(rdf_parser, element);
return;
}
static void
raptor_rdfxml_end_element_handler(void *user_data,
raptor_xml_element* xml_element)
{
raptor_parser* rdf_parser;
raptor_rdfxml_parser* rdf_xml_parser;
raptor_rdfxml_element* element;
rdf_parser=(raptor_parser*)user_data;
rdf_xml_parser=(raptor_rdfxml_parser*)rdf_parser->context;
if(!rdf_parser->failed) {
raptor_rdfxml_update_document_locator(rdf_parser);
raptor_rdfxml_end_element_grammar(rdf_parser, rdf_xml_parser->current_element);
}
element=raptor_rdfxml_element_pop(rdf_xml_parser);
if(element) {
if(element->parent) {
/* Do not change this; PROPERTYELT will turn into MEMBER if necessary
* See the switch case for MEMBER / PROPERTYELT where the test is done.
*
* PARSETYPE_RESOURCE should never be propogated up since it
* will turn the next child (node) element into a property
*/
if(element->state != RAPTOR_STATE_MEMBER_PROPERTYELT &&
element->state != RAPTOR_STATE_PARSETYPE_RESOURCE)
element->parent->child_state=element->state;
}
raptor_free_rdfxml_element(element);
}
}
/* cdata (and ignorable whitespace for libxml).
* s is not 0 terminated for expat, is for libxml - grrrr.
*/
static void
raptor_rdfxml_characters_handler(void *user_data,
raptor_xml_element* xml_element,
const unsigned char *s, int len)
{
raptor_parser* rdf_parser=(raptor_parser*)user_data;
raptor_rdfxml_cdata_grammar(rdf_parser, s, len, 0);
}
/* cdata (and ignorable whitespace for libxml).
* s is not 0 terminated for expat, is for libxml - grrrr.
*/
static void
raptor_rdfxml_cdata_handler(void *user_data, raptor_xml_element* xml_element,
const unsigned char *s, int len)
{
raptor_parser* rdf_parser=(raptor_parser*)user_data;
raptor_rdfxml_cdata_grammar(rdf_parser, s, len, 1);
}
/* comment handler
* s is 0 terminated
*/
static void
raptor_rdfxml_comment_handler(void *user_data, raptor_xml_element* xml_element,
const unsigned char *s)
{
raptor_parser* rdf_parser=(raptor_parser*)user_data;
raptor_rdfxml_parser* rdf_xml_parser;
raptor_rdfxml_element* element;
if(rdf_parser->failed || !xml_element)
return;
rdf_xml_parser=(raptor_rdfxml_parser*)rdf_parser->context;
element=rdf_xml_parser->current_element;
if(element) {
if(element->child_content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_XML_LITERAL)
raptor_xml_writer_comment(rdf_xml_parser->xml_writer, s);
}
#ifdef RAPTOR_DEBUG_VERBOSE
RAPTOR_DEBUG2("XML Comment '%s'\n", s);
#endif
}
static int
raptor_rdfxml_parse_init(raptor_parser* rdf_parser, const char *name)
{
raptor_rdfxml_parser* rdf_xml_parser=(raptor_rdfxml_parser*)rdf_parser->context;
raptor_sax2* sax2;
/* Allocate sax2 object */
sax2=raptor_new_sax2(rdf_parser, &rdf_parser->error_handlers);
rdf_xml_parser->sax2=sax2;
if(!sax2)
return 1;
/* Initialize sax2 element handlers */
raptor_sax2_set_start_element_handler(sax2, raptor_rdfxml_start_element_handler);
raptor_sax2_set_end_element_handler(sax2, raptor_rdfxml_end_element_handler);
raptor_sax2_set_characters_handler(sax2, raptor_rdfxml_characters_handler);
raptor_sax2_set_cdata_handler(sax2, raptor_rdfxml_cdata_handler);
raptor_sax2_set_comment_handler(sax2, raptor_rdfxml_comment_handler);
raptor_sax2_set_namespace_handler(sax2, raptor_rdfxml_sax2_new_namespace_handler);
/* Allocate uris */
RAPTOR_RDF_type_URI(rdf_xml_parser)=raptor_new_uri_for_rdf_concept("type");
RAPTOR_RDF_value_URI(rdf_xml_parser)=raptor_new_uri_for_rdf_concept("value");
RAPTOR_RDF_subject_URI(rdf_xml_parser)=raptor_new_uri_for_rdf_concept("subject");
RAPTOR_RDF_predicate_URI(rdf_xml_parser)=raptor_new_uri_for_rdf_concept("predicate");
RAPTOR_RDF_object_URI(rdf_xml_parser)=raptor_new_uri_for_rdf_concept("object");
RAPTOR_RDF_Statement_URI(rdf_xml_parser)=raptor_new_uri_for_rdf_concept("Statement");
RAPTOR_RDF_Seq_URI(rdf_xml_parser)=raptor_new_uri_for_rdf_concept("Seq");
RAPTOR_RDF_Bag_URI(rdf_xml_parser)=raptor_new_uri_for_rdf_concept("Bag");
RAPTOR_RDF_Alt_URI(rdf_xml_parser)=raptor_new_uri_for_rdf_concept("Alt");
RAPTOR_RDF_List_URI(rdf_xml_parser)=raptor_new_uri_for_rdf_concept("List");
RAPTOR_RDF_first_URI(rdf_xml_parser)=raptor_new_uri_for_rdf_concept("first");
RAPTOR_RDF_rest_URI(rdf_xml_parser)=raptor_new_uri_for_rdf_concept("rest");
RAPTOR_RDF_nil_URI(rdf_xml_parser)=raptor_new_uri_for_rdf_concept("nil");
RAPTOR_DAML_NS_URI(rdf_xml_parser)=raptor_new_uri((const unsigned char*)"http://www.daml.org/2001/03/daml+oil#");
RAPTOR_DAML_List_URI(rdf_xml_parser)=raptor_new_uri_from_uri_local_name(RAPTOR_DAML_NS_URI(rdf_xml_parser), (const unsigned char *)"List");
RAPTOR_DAML_first_URI(rdf_xml_parser)=raptor_new_uri_from_uri_local_name(RAPTOR_DAML_NS_URI(rdf_xml_parser) ,(const unsigned char *)"first");
RAPTOR_DAML_rest_URI(rdf_xml_parser)=raptor_new_uri_from_uri_local_name(RAPTOR_DAML_NS_URI(rdf_xml_parser), (const unsigned char *)"rest");
RAPTOR_DAML_nil_URI(rdf_xml_parser)=raptor_new_uri_from_uri_local_name(RAPTOR_DAML_NS_URI(rdf_xml_parser), (const unsigned char *)"nil");
RAPTOR_RDF_RDF_URI(rdf_xml_parser)=raptor_new_uri_for_rdf_concept("RDF");
RAPTOR_RDF_Description_URI(rdf_xml_parser)=raptor_new_uri_for_rdf_concept("Description");
RAPTOR_RDF_li_URI(rdf_xml_parser)=raptor_new_uri_for_rdf_concept("li");
RAPTOR_RDF_XMLLiteral_URI(rdf_xml_parser)=raptor_new_uri(raptor_xml_literal_datatype_uri_string);
/* Check for uri allocation failures */
if(!RAPTOR_RDF_type_URI(rdf_xml_parser) ||
!RAPTOR_RDF_value_URI(rdf_xml_parser) ||
!RAPTOR_RDF_subject_URI(rdf_xml_parser) ||
!RAPTOR_RDF_predicate_URI(rdf_xml_parser) ||
!RAPTOR_RDF_object_URI(rdf_xml_parser) ||
!RAPTOR_RDF_Statement_URI(rdf_xml_parser) ||
!RAPTOR_RDF_Seq_URI(rdf_xml_parser) ||
!RAPTOR_RDF_Bag_URI(rdf_xml_parser) ||
!RAPTOR_RDF_Alt_URI(rdf_xml_parser) ||
!RAPTOR_RDF_List_URI(rdf_xml_parser) ||
!RAPTOR_RDF_first_URI(rdf_xml_parser) ||
!RAPTOR_RDF_rest_URI(rdf_xml_parser) ||
!RAPTOR_RDF_nil_URI(rdf_xml_parser) ||
!RAPTOR_DAML_NS_URI(rdf_xml_parser) ||
!RAPTOR_DAML_List_URI(rdf_xml_parser) ||
!RAPTOR_DAML_first_URI(rdf_xml_parser) ||
!RAPTOR_DAML_rest_URI(rdf_xml_parser) ||
!RAPTOR_DAML_nil_URI(rdf_xml_parser) ||
!RAPTOR_RDF_RDF_URI(rdf_xml_parser) ||
!RAPTOR_RDF_Description_URI(rdf_xml_parser) ||
!RAPTOR_RDF_li_URI(rdf_xml_parser) ||
!RAPTOR_RDF_XMLLiteral_URI(rdf_xml_parser))
return 1;
/* Create id set object */
rdf_xml_parser->id_set=raptor_new_id_set();
if(!rdf_xml_parser->id_set)
return 1;
/* Everything succeeded */
return 0;
}
static int
raptor_rdfxml_parse_start(raptor_parser* rdf_parser)
{
raptor_uri *uri=rdf_parser->base_uri;
raptor_rdfxml_parser* rdf_xml_parser=(raptor_rdfxml_parser*)rdf_parser->context;
/* base URI required for RDF/XML */
if(!uri)
return 1;
/* Optionally normalize language to lowercase
* http://www.w3.org/TR/rdf-concepts/#dfn-language-identifier
*/
raptor_sax2_set_feature(rdf_xml_parser->sax2,
RAPTOR_FEATURE_NORMALIZE_LANGUAGE,
rdf_parser->features[RAPTOR_FEATURE_NORMALIZE_LANGUAGE]);
/* Optionally forbid network requests in the XML parser */
raptor_sax2_set_feature(rdf_xml_parser->sax2,
RAPTOR_FEATURE_NO_NET,
rdf_parser->features[RAPTOR_FEATURE_NO_NET]);
raptor_sax2_parse_start(rdf_xml_parser->sax2, uri);
return 0;
}
static void
raptor_rdfxml_parse_terminate(raptor_parser *rdf_parser)
{
raptor_rdfxml_parser* rdf_xml_parser=(raptor_rdfxml_parser*)rdf_parser->context;
raptor_rdfxml_element* element;
int i;
if(rdf_xml_parser->sax2) {
raptor_free_sax2(rdf_xml_parser->sax2);
rdf_xml_parser->sax2=NULL;
}
while( (element=raptor_rdfxml_element_pop(rdf_xml_parser)) )
raptor_free_rdfxml_element(element);
for(i=0; i< RAPTOR_RDFXML_N_CONCEPTS; i++) {
raptor_uri* concept_uri=rdf_xml_parser->concepts[i];
if(concept_uri) {
raptor_free_uri(concept_uri);
rdf_xml_parser->concepts[i]=NULL;
}
}
if(rdf_xml_parser->id_set) {
raptor_free_id_set(rdf_xml_parser->id_set);
rdf_xml_parser->id_set=NULL;
}
}
static int
raptor_rdfxml_parse_recognise_syntax(raptor_parser_factory* factory,
const unsigned char *buffer, size_t len,
const unsigned char *identifier,
const unsigned char *suffix,
const char *mime_type)
{
int score= 0;
if(suffix) {
if(!strcmp((const char*)suffix, "rdf") ||
!strcmp((const char*)suffix, "rdfs") ||
!strcmp((const char*)suffix, "foaf") ||
!strcmp((const char*)suffix, "doap") ||
!strcmp((const char*)suffix, "owl") ||
!strcmp((const char*)suffix, "daml"))
score=9;
if(!strcmp((const char*)suffix, "rss"))
score=3;
}
if(identifier) {
if(strstr((const char*)identifier, "rss1"))
score+=5;
else if(!suffix && strstr((const char*)identifier, "rss"))
score+=3;
else if(!suffix && strstr((const char*)identifier, "rdf"))
score+=2;
else if(!suffix && strstr((const char*)identifier, "RDF"))
score+=2;
}
if(mime_type) {
if(strstr((const char*)mime_type, "html"))
score-= 4;
else if(!strcmp((const char*)mime_type, "text/rdf"))
score+= 7;
else if(!strcmp((const char*)mime_type, "application/xml"))
score+= 5;
}
if(buffer && len) {
/* Check it's an XML namespace declared and not N3 or Turtle which
* mention the namespace URI but not in this form.
*/
#define HAS_RDF_XMLNS1 (strstr((const char*)buffer, "xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#") != NULL)
#define HAS_RDF_XMLNS2 (strstr((const char*)buffer, "xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#") != NULL)
#define HAS_RDF_XMLNS3 (strstr((const char*)buffer, "xmlns=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#") != NULL)
#define HAS_RDF_XMLNS4 (strstr((const char*)buffer, "xmlns='http://www.w3.org/1999/02/22-rdf-syntax-ns#") != NULL)
#define HAS_RDF_ENTITY1 (strstr((const char*)buffer, "<!ENTITY rdf 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'>") != NULL)
#define HAS_RDF_ENTITY2 (strstr((const char*)buffer, "<!ENTITY rdf \"http://www.w3.org/1999/02/22-rdf-syntax-ns#\">") != NULL)
#define HAS_RDF_ENTITY3 (strstr((const char*)buffer, "xmlns:rdf=\"&rdf;\"") != NULL)
#define HAS_RDF_ENTITY4 (strstr((const char*)buffer, "xmlns:rdf='&rdf;'") != NULL)
#define HAS_HTML_NS (strstr((const char*)buffer, "http://www.w3.org/1999/xhtml") != NULL)
#define HAS_HTML_ROOT (strstr((const char*)buffer, "<html") != NULL)
if(!HAS_HTML_NS && !HAS_HTML_ROOT &&
(HAS_RDF_XMLNS1 || HAS_RDF_XMLNS2 || HAS_RDF_XMLNS3 || HAS_RDF_XMLNS4 ||
HAS_RDF_ENTITY1 || HAS_RDF_ENTITY2 || HAS_RDF_ENTITY3 || HAS_RDF_ENTITY4)
) {
int has_rdf_RDF=(strstr((const char*)buffer, "<rdf:RDF") != NULL);
int has_rdf_Description=(strstr((const char*)buffer, "rdf:Description") != NULL);
int has_rdf_about=(strstr((const char*)buffer, "rdf:about") != NULL);
score+= 7;
if(has_rdf_RDF)
score++;
if(has_rdf_Description)
score++;
if(has_rdf_about)
score++;
}
}
return score;
}
static int
raptor_rdfxml_parse_chunk(raptor_parser* rdf_parser, const unsigned char *buffer,
size_t len, int is_end)
{
raptor_rdfxml_parser* rdf_xml_parser=(raptor_rdfxml_parser*)rdf_parser->context;
if(rdf_parser->failed)
return 1;
return raptor_sax2_parse_chunk(rdf_xml_parser->sax2, buffer, len, is_end);
}
static void
raptor_rdfxml_generate_statement(raptor_parser *rdf_parser,
raptor_uri *subject_uri,
const unsigned char *subject_id,
const raptor_identifier_type subject_type,
const raptor_uri_source subject_uri_source,
raptor_uri *predicate_uri,
const unsigned char *predicate_id,
raptor_identifier_type predicate_type,
const raptor_uri_source predicate_uri_source,
int predicate_ordinal,
raptor_uri *object_uri,
const unsigned char *object_id,
const raptor_identifier_type object_type,
const raptor_uri_source object_uri_source,
raptor_uri *literal_datatype,
raptor_identifier *reified,
raptor_rdfxml_element* bag_element)
{
raptor_statement *statement=&rdf_parser->statement;
const unsigned char *language=NULL;
static const char empty_literal[1]="";
raptor_rdfxml_parser *rdf_xml_parser=(raptor_rdfxml_parser*)rdf_parser->context;
char *reified_id=NULL;
raptor_uri* uri1=NULL;
raptor_uri* uri2=NULL;
if(rdf_parser->failed)
return;
if((object_type == RAPTOR_IDENTIFIER_TYPE_LITERAL ||
object_type == RAPTOR_IDENTIFIER_TYPE_XML_LITERAL) &&
!literal_datatype) {
language=raptor_sax2_inscope_xml_language(rdf_xml_parser->sax2);
if(!object_uri)
object_uri=(raptor_uri*)empty_literal;
}
statement->subject=subject_uri ? (void*)subject_uri : (void*)subject_id;
statement->subject_type=subject_type;
statement->predicate_type=RAPTOR_IDENTIFIER_TYPE_RESOURCE;
if(predicate_type == RAPTOR_IDENTIFIER_TYPE_ORDINAL) {
/* new URI object */
uri1=raptor_new_uri_from_rdf_ordinal(predicate_ordinal);
predicate_uri=uri1;
predicate_id=NULL;
}
statement->predicate=predicate_uri;
statement->object=object_uri ? (void*)object_uri : (void*)object_id;
statement->object_type=object_type;
statement->object_literal_language=language;
statement->object_literal_datatype=literal_datatype;
#ifdef RAPTOR_DEBUG_VERBOSE
fprintf(stderr, "raptor_rdfxml_generate_statement: Generating statement: ");
raptor_print_statement(statement, stderr);
fputc('\n', stderr);
if(!(subject_uri||subject_id))
RAPTOR_FATAL1("Statement has no subject\n");
if(!(predicate_uri||predicate_id))
RAPTOR_FATAL1("Statement has no predicate\n");
if(!(object_uri||object_id))
RAPTOR_FATAL1("Statement has no object\n");
#endif
if(!rdf_parser->statement_handler)
goto generate_tidy;
/* Generate the statement; or is it fact? */
(*rdf_parser->statement_handler)(rdf_parser->user_data, statement);
/* the bagID mess */
if(rdf_parser->features[RAPTOR_FEATURE_ALLOW_BAGID] &&
bag_element && (bag_element->bag.uri || bag_element->bag.id)) {
raptor_identifier* bag=&bag_element->bag;
statement->subject=bag->uri ? (void*)bag->uri : (void*)bag->id;
statement->subject_type=bag->type;
bag_element->last_bag_ordinal++;
/* new URI object */
uri2=raptor_new_uri_from_rdf_ordinal(bag_element->last_bag_ordinal);
statement->predicate=uri2;
if(reified && (reified->uri || reified->id)) {
statement->object=reified->uri ? (void*)reified->uri : (void*)reified->id;
statement->object_type=reified->type;
} else {
/* reified may be NULL so do not use it */
reified_id=(char*)raptor_parser_internal_generate_id(rdf_parser, RAPTOR_GENID_TYPE_BNODEID, NULL);
statement->object=reified_id;
statement->object_type=RAPTOR_IDENTIFIER_TYPE_ANONYMOUS;
}
(*rdf_parser->statement_handler)(rdf_parser->user_data, statement);
} else if(!reified || (!reified->uri && !reified->id))
goto generate_tidy;
/* generate reified statements */
statement->subject_type=RAPTOR_IDENTIFIER_TYPE_RESOURCE;
statement->predicate_type=RAPTOR_IDENTIFIER_TYPE_RESOURCE;
statement->object_type=RAPTOR_IDENTIFIER_TYPE_RESOURCE;
statement->object_literal_language=NULL;
if(reified_id) {
/* reified may be NULL so do not use it */
statement->subject=reified_id;
statement->subject_type=RAPTOR_IDENTIFIER_TYPE_ANONYMOUS;
} else {
statement->subject=reified->uri ? (void*)reified->uri : (void*)reified->id;
statement->subject_type=reified->type;
}
statement->predicate=RAPTOR_RDF_type_URI(rdf_xml_parser);
statement->object=RAPTOR_RDF_Statement_URI(rdf_xml_parser);
(*rdf_parser->statement_handler)(rdf_parser->user_data, statement);
statement->predicate=RAPTOR_RDF_subject_URI(rdf_xml_parser);
statement->object=subject_uri ? (void*)subject_uri : (void*)subject_id;
statement->object_type=subject_type;
(*rdf_parser->statement_handler)(rdf_parser->user_data, statement);
statement->predicate=RAPTOR_RDF_predicate_URI(rdf_xml_parser);
statement->object=predicate_uri ? (void*)predicate_uri : (void*)predicate_id;
statement->object_type=predicate_type;
(*rdf_parser->statement_handler)(rdf_parser->user_data, statement);
statement->predicate=RAPTOR_RDF_object_URI(rdf_xml_parser);
statement->object=object_uri ? (void*)object_uri : (void*)object_id;
statement->object_type=object_type;
statement->object_literal_language=language;
(*rdf_parser->statement_handler)(rdf_parser->user_data, statement);
generate_tidy:
/* Tidy up things allocated here */
if(reified_id)
RAPTOR_FREE(cstring, reified_id);
if(uri1)
raptor_free_uri(uri1);
if(uri2)
raptor_free_uri(uri2);
}
/**
* raptor_rdfxml_element_has_property_attributes:
* @element: element with the property attributes
*
* Return true if the element has at least one property attribute.
*
**/
static int
raptor_rdfxml_element_has_property_attributes(raptor_rdfxml_element *element)
{
int i;
if(element->xml_element->attribute_count >0)
return 1;
/* look for rdf: properties */
for(i=0; i<= RDF_ATTR_LAST; i++) {
if(element->rdf_attr[i] &&
rdf_syntax_terms_info[i].type != RAPTOR_IDENTIFIER_TYPE_UNKNOWN)
return 1;
}
return 0;
}
/**
* raptor_rdfxml_process_property_attributes:
* @rdf_parser: Raptor parser object
* @attributes_element: element with the property attributes
* @resource_element: element that defines the resource URI
* subject_uri, subject_uri_source etc.
* @property_node_identifier: Use this identifier for the resource URI
* and count any ordinals for it locally
*
* Process the property attributes for an element for a given resource.
*
**/
static void
raptor_rdfxml_process_property_attributes(raptor_parser *rdf_parser,
raptor_rdfxml_element *attributes_element,
raptor_rdfxml_element *resource_element,
raptor_identifier *property_node_identifier)
{
unsigned int i;
raptor_identifier *resource_identifier;
resource_identifier=property_node_identifier ? property_node_identifier : &resource_element->subject;
/* Process attributes as propAttr* = * (propName="string")*
*/
for(i=0; i < attributes_element->xml_element->attribute_count; i++) {
raptor_qname* attr=attributes_element->xml_element->attributes[i];
const unsigned char *name;
const unsigned char *value;
int handled=0;
if(!attr)
continue;
name=attr->local_name;
value = attr->value;
if(!attr->nspace) {
raptor_rdfxml_update_document_locator(rdf_parser);
raptor_parser_error(rdf_parser, "Using property attribute '%s' without a namespace is forbidden.", name);
continue;
}
if(!raptor_utf8_is_nfc(value, strlen((const char*)value))) {
const char *message="Property attribute '%s' has a string not in Unicode Normal Form C: %s";
raptor_rdfxml_update_document_locator(rdf_parser);
if(rdf_parser->features[RAPTOR_FEATURE_NON_NFC_FATAL])
raptor_parser_error(rdf_parser, message, name, value);
else
raptor_parser_warning(rdf_parser, message, name, value);
continue;
}
/* Generate the property statement using one of these properties:
* 1) rdf:_n
* 2) the URI from the rdf:* attribute where allowed
* 3) otherwise forbidden (including rdf:li)
*/
if(attr->nspace->is_rdf_ms) {
/* is rdf: namespace */
int ordinal=0;
if(*name == '_') {
/* recognise rdf:_ */
name++;
ordinal=raptor_check_ordinal(name);
if(ordinal < 1) {
raptor_rdfxml_update_document_locator(rdf_parser);
raptor_parser_error(rdf_parser, "Illegal ordinal value %d in property attribute '%s' seen on containing element '%s'.", ordinal, attr->local_name, name);
ordinal=1;
}
} else {
raptor_rdfxml_update_document_locator(rdf_parser);
if(raptor_rdfxml_forbidden_propertyAttribute_name((const char*)name) > 0)
raptor_parser_error(rdf_parser, "RDF term %s is forbidden as a property attribute.", name);
else
raptor_parser_warning(rdf_parser, "Unknown RDF namespace property attribute '%s'.",
name);
}
if(ordinal >= 1) {
/* Generate an ordinal property when there are no problems */
raptor_rdfxml_generate_statement(rdf_parser,
resource_identifier->uri,
resource_identifier->id,
resource_identifier->type,
resource_identifier->uri_source,
NULL,
NULL,
RAPTOR_IDENTIFIER_TYPE_ORDINAL,
RAPTOR_URI_SOURCE_NOT_URI,
ordinal,
(raptor_uri*)value,
NULL,
RAPTOR_IDENTIFIER_TYPE_LITERAL,
RAPTOR_URI_SOURCE_NOT_URI,
NULL,
NULL, /* Property attributes are never reified*/
resource_element);
handled=1;
}
} /* end is RDF namespace property */
if(!handled)
/* else not rdf: namespace or unknown in rdf: namespace so
* generate a statement with a literal object
*/
raptor_rdfxml_generate_statement(rdf_parser,
resource_identifier->uri,
resource_identifier->id,
resource_identifier->type,
resource_identifier->uri_source,
attr->uri,
NULL,
RAPTOR_IDENTIFIER_TYPE_RESOURCE,
RAPTOR_URI_SOURCE_ATTRIBUTE,
0,
(raptor_uri*)value,
NULL,
RAPTOR_IDENTIFIER_TYPE_LITERAL,
RAPTOR_URI_SOURCE_NOT_URI,
NULL,
NULL, /* Property attributes are never reified*/
resource_element);
} /* end for ... attributes */
/* Handle rdf property attributes
* (only rdf:type and rdf:value at present)
*/
for(i=0; i<= RDF_ATTR_LAST; i++) {
const unsigned char *value=attributes_element->rdf_attr[i];
int object_is_literal=(rdf_syntax_terms_info[i].type == RAPTOR_IDENTIFIER_TYPE_LITERAL);
raptor_uri *property_uri, *object_uri;
raptor_identifier_type object_type;
if(!value)
continue;
if(rdf_syntax_terms_info[i].type == RAPTOR_IDENTIFIER_TYPE_UNKNOWN) {
const char *name=rdf_syntax_terms_info[i].name;
if(raptor_rdfxml_forbidden_propertyAttribute_name(name)) {
raptor_rdfxml_update_document_locator(rdf_parser);
raptor_parser_error(rdf_parser, "RDF term %s is forbidden as a property attribute.", name);
continue;
}
}
if(object_is_literal && !raptor_utf8_is_nfc(value, strlen((const char*)value))) {
const char *message="Property attribute '%s' has a string not in Unicode Normal Form C: %s";
raptor_rdfxml_update_document_locator(rdf_parser);
if(rdf_parser->features[RAPTOR_FEATURE_NON_NFC_FATAL])
raptor_parser_error(rdf_parser, message, rdf_syntax_terms_info[i].name, value);
else
raptor_parser_warning(rdf_parser, message, rdf_syntax_terms_info[i].name, value);
continue;
}
property_uri=raptor_new_uri_for_rdf_concept(rdf_syntax_terms_info[i].name);
object_uri=object_is_literal ? (raptor_uri*)value : raptor_new_uri_relative_to_base(raptor_rdfxml_inscope_base_uri(rdf_parser), value);
object_type=object_is_literal ? RAPTOR_IDENTIFIER_TYPE_LITERAL : RAPTOR_IDENTIFIER_TYPE_RESOURCE;
raptor_rdfxml_generate_statement(rdf_parser,
resource_identifier->uri,
resource_identifier->id,
resource_identifier->type,
resource_identifier->uri_source,
property_uri,
NULL,
RAPTOR_IDENTIFIER_TYPE_RESOURCE,
RAPTOR_URI_SOURCE_ATTRIBUTE,
0,
object_uri,
NULL,
object_type,
RAPTOR_URI_SOURCE_NOT_URI,
NULL,
NULL, /* Property attributes are never reified*/
resource_element);
if(!object_is_literal)
raptor_free_uri(object_uri);
raptor_free_uri(property_uri);
} /* end for rdf:property values */
}
static void
raptor_rdfxml_start_element_grammar(raptor_parser *rdf_parser,
raptor_rdfxml_element *element)
{
int finished;
raptor_state state;
raptor_xml_element* xml_element=element->xml_element;
const unsigned char *el_name=raptor_xml_element_get_name(xml_element)->local_name;
int element_in_rdf_ns=(raptor_xml_element_get_name(xml_element)->nspace &&
raptor_xml_element_get_name(xml_element)->nspace->is_rdf_ms);
raptor_rdfxml_parser *rdf_xml_parser=(raptor_rdfxml_parser*)rdf_parser->context;
int rc=0;
raptor_uri* base_uri;
state=element->state;
#ifdef RAPTOR_DEBUG_VERBOSE
RAPTOR_DEBUG2("Starting in state %s\n", raptor_rdfxml_state_as_string(state));
#endif
base_uri=raptor_rdfxml_inscope_base_uri(rdf_parser);
finished= 0;
while(!finished) {
switch(state) {
case RAPTOR_STATE_SKIPPING:
element->child_state=state;
element->child_content_type=RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PRESERVED;
finished=1;
break;
case RAPTOR_STATE_UNKNOWN:
/* found <rdf:RDF> ? */
if(element_in_rdf_ns) {
if(raptor_uri_equals(raptor_xml_element_get_name(xml_element)->uri, RAPTOR_RDF_RDF_URI(rdf_xml_parser))) {
element->child_state=RAPTOR_STATE_NODE_ELEMENT_LIST;
element->child_content_type=RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_NODES;
/* Yes - need more content before can continue,
* so wait for another element
*/
finished=1;
break;
}
if(raptor_uri_equals(raptor_xml_element_get_name(xml_element)->uri, RAPTOR_RDF_Description_URI(rdf_xml_parser))) {
state=RAPTOR_STATE_DESCRIPTION;
element->content_type=RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTIES;
/* Yes - found something so move immediately to description */
break;
}
if(element_in_rdf_ns && (rc=raptor_rdfxml_forbidden_nodeElement_name((const char*)el_name))) {
if(rc > 0) {
raptor_parser_error(rdf_parser, "rdf:%s is forbidden as a node element.", el_name);
state=RAPTOR_STATE_SKIPPING;
element->child_state=RAPTOR_STATE_SKIPPING;
finished=1;
break;
} else
raptor_parser_warning(rdf_parser, "rdf:%s is an unknown RDF namespaced element.", el_name);
}
}
/* If scanning for element, can continue */
if(rdf_parser->features[RAPTOR_FEATURE_SCANNING]) {
finished=1;
break;
}
/* Otherwise the choice of the next state can be made
* from the current element by the OBJ state
*/
state=RAPTOR_STATE_NODE_ELEMENT_LIST;
element->content_type=RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_NODES;
break;
case RAPTOR_STATE_NODE_ELEMENT_LIST:
/* Handling
* http://www.w3.org/TR/rdf-syntax-grammar/#nodeElementList
*
* Everything goes to nodeElement
*/
state=RAPTOR_STATE_NODE_ELEMENT;
element->content_type=RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTIES;
break;
case RAPTOR_STATE_DESCRIPTION:
case RAPTOR_STATE_NODE_ELEMENT:
case RAPTOR_STATE_PARSETYPE_RESOURCE:
case RAPTOR_STATE_PARSETYPE_COLLECTION:
/* Handling <rdf:Description> or other node element
* http://www.w3.org/TR/rdf-syntax-grammar/#nodeElement
*
* or a property element acting as a node element for
* rdf:parseType="Resource"
* http://www.w3.org/TR/rdf-syntax-grammar/#parseTypeResourcePropertyElt
* or rdf:parseType="Collection" (and daml:Collection)
* http://www.w3.org/TR/rdf-syntax-grammar/#parseTypeCollectionPropertyElt
*
* Only create a bag if bagID given
*/
if(!raptor_xml_element_get_name(xml_element)->uri) {
/* We cannot handle this */
raptor_parser_warning(rdf_parser, "Using node element '%s' without a namespace is forbidden.",
raptor_xml_element_get_name(xml_element)->local_name);
raptor_rdfxml_update_document_locator(rdf_parser);
element->state=RAPTOR_STATE_SKIPPING;
element->child_state=RAPTOR_STATE_SKIPPING;
finished=1;
break;
}
if(element_in_rdf_ns &&
(rc = raptor_rdfxml_forbidden_nodeElement_name((const char*)el_name))) {
if(rc > 0) {
raptor_parser_error(rdf_parser, "rdf:%s is forbidden as a node element.", el_name);
state=RAPTOR_STATE_SKIPPING;
element->state=RAPTOR_STATE_SKIPPING;
element->child_state=RAPTOR_STATE_SKIPPING;
finished=1;
break;
} else
raptor_parser_warning(rdf_parser, "rdf:%s is an unknown RDF namespaced element.", el_name);
}
if(element->content_type !=RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_COLLECTION &&
element->content_type !=RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION &&
element->parent &&
(element->parent->state == RAPTOR_STATE_PROPERTYELT ||
element->parent->state == RAPTOR_STATE_MEMBER_PROPERTYELT) &&
element->parent->xml_element->content_element_seen > 1) {
raptor_rdfxml_update_document_locator(rdf_parser);
raptor_parser_error(rdf_parser, "The enclosing property already has an object");
state=RAPTOR_STATE_SKIPPING;
element->child_state=RAPTOR_STATE_SKIPPING;
finished=1;
break;
}
if(state == RAPTOR_STATE_NODE_ELEMENT ||
state == RAPTOR_STATE_DESCRIPTION ||
state == RAPTOR_STATE_PARSETYPE_COLLECTION) {
if(element_in_rdf_ns &&
raptor_uri_equals(raptor_xml_element_get_name(xml_element)->uri, RAPTOR_RDF_Description_URI(rdf_xml_parser)))
state=RAPTOR_STATE_DESCRIPTION;
else
state=RAPTOR_STATE_NODE_ELEMENT;
}
if((element->rdf_attr[RDF_ATTR_ID]!=NULL) +
(element->rdf_attr[RDF_ATTR_about]!=NULL) +
(element->rdf_attr[RDF_ATTR_nodeID]!=NULL)>1) {
raptor_rdfxml_update_document_locator(rdf_parser);
raptor_parser_error(rdf_parser, "Multiple attributes of rdf:ID, rdf:about and rdf:nodeID on element '%s' - only one allowed.", el_name);
}
if(element->rdf_attr[RDF_ATTR_ID]) {
element->subject.id=element->rdf_attr[RDF_ATTR_ID];
element->rdf_attr[RDF_ATTR_ID]=NULL;
element->subject.uri=raptor_new_uri_from_id(base_uri, element->subject.id);
if(!element->subject.uri)
goto oom;
element->subject.type=RAPTOR_IDENTIFIER_TYPE_RESOURCE;
element->subject.uri_source=RAPTOR_URI_SOURCE_ID;
if(!raptor_valid_xml_ID(rdf_parser, element->subject.id)) {
raptor_parser_error(rdf_parser, "Illegal rdf:ID value '%s'", element->subject.id);
state=RAPTOR_STATE_SKIPPING;
element->child_state=RAPTOR_STATE_SKIPPING;
finished=1;
break;
}
if(raptor_rdfxml_record_ID(rdf_parser, element, element->subject.id)) {
raptor_parser_error(rdf_parser, "Duplicated rdf:ID value '%s'", element->subject.id);
state=RAPTOR_STATE_SKIPPING;
element->child_state=RAPTOR_STATE_SKIPPING;
finished=1;
break;
}
} else if (element->rdf_attr[RDF_ATTR_about]) {
element->subject.uri=raptor_new_uri_relative_to_base(base_uri, (const unsigned char*)element->rdf_attr[RDF_ATTR_about]);
RAPTOR_FREE(cstring, (void*)element->rdf_attr[RDF_ATTR_about]);
element->rdf_attr[RDF_ATTR_about]=NULL;
if(!element->subject.uri)
goto oom;
element->subject.type=RAPTOR_IDENTIFIER_TYPE_RESOURCE;
element->subject.uri_source=RAPTOR_URI_SOURCE_URI;
} else if (element->rdf_attr[RDF_ATTR_nodeID]) {
element->subject.id=raptor_parser_internal_generate_id(rdf_parser, RAPTOR_GENID_TYPE_BNODEID, (unsigned char*)element->rdf_attr[RDF_ATTR_nodeID]);
element->rdf_attr[RDF_ATTR_nodeID]=NULL;
if(!element->subject.id)
goto oom;
element->subject.type=RAPTOR_IDENTIFIER_TYPE_ANONYMOUS;
element->subject.uri_source=RAPTOR_URI_SOURCE_BLANK_ID;
if(!raptor_valid_xml_ID(rdf_parser, element->subject.id)) {
raptor_parser_error(rdf_parser, "Illegal rdf:nodeID value '%s'", element->subject.id);
state=RAPTOR_STATE_SKIPPING;
element->child_state=RAPTOR_STATE_SKIPPING;
finished=1;
break;
}
} else if (element->parent &&
element->parent->child_content_type != RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_COLLECTION &&
element->parent->child_content_type != RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION &&
(element->parent->object.uri || element->parent->object.id)) {
/* copy from parent (property element), it has a URI for us */
raptor_copy_identifier(&element->subject, &element->parent->object);
} else {
element->subject.id=raptor_parser_internal_generate_id(rdf_parser, RAPTOR_GENID_TYPE_BNODEID, NULL);
if(!element->subject.id)
goto oom;
element->subject.type=RAPTOR_IDENTIFIER_TYPE_ANONYMOUS;
element->subject.uri_source=RAPTOR_URI_SOURCE_GENERATED;
}
if(element->rdf_attr[RDF_ATTR_bagID]) {
if(rdf_parser->features[RAPTOR_FEATURE_ALLOW_BAGID]) {
element->bag.id=element->rdf_attr[RDF_ATTR_bagID];
element->rdf_attr[RDF_ATTR_bagID]=NULL;
element->bag.uri=raptor_new_uri_from_id(base_uri, element->bag.id);
if(!element->bag.uri)
goto oom;
element->bag.type=RAPTOR_IDENTIFIER_TYPE_RESOURCE;
element->bag.uri_source=RAPTOR_URI_SOURCE_GENERATED;
if(!raptor_valid_xml_ID(rdf_parser, element->bag.id)) {
raptor_parser_error(rdf_parser, "Illegal rdf:bagID value '%s'", element->bag.id);
state=RAPTOR_STATE_SKIPPING;
element->child_state=RAPTOR_STATE_SKIPPING;
finished=1;
break;
}
if(raptor_rdfxml_record_ID(rdf_parser, element, element->bag.id)) {
raptor_parser_error(rdf_parser, "Duplicated rdf:bagID value '%s'", element->bag.id);
state=RAPTOR_STATE_SKIPPING;
element->child_state=RAPTOR_STATE_SKIPPING;
finished=1;
break;
}
raptor_parser_warning(rdf_parser, "rdf:bagID is deprecated.");
raptor_rdfxml_generate_statement(rdf_parser,
element->bag.uri,
element->bag.id,
element->bag.type,
element->bag.uri_source,
RAPTOR_RDF_type_URI(rdf_xml_parser),
NULL,
RAPTOR_IDENTIFIER_TYPE_RESOURCE,
RAPTOR_URI_SOURCE_URI,
0,
RAPTOR_RDF_Bag_URI(rdf_xml_parser),
NULL,
RAPTOR_IDENTIFIER_TYPE_RESOURCE,
RAPTOR_URI_SOURCE_NOT_URI,
NULL,
NULL,
NULL);
} else {
/* bagID forbidden */
raptor_parser_error(rdf_parser, "rdf:bagID is forbidden.");
state=RAPTOR_STATE_SKIPPING;
element->child_state=RAPTOR_STATE_SKIPPING;
finished=1;
break;
}
}
if(element->parent) {
/* In a rdf:parseType="Collection" the resources are appended
* to the list at the genid element->parent->tail_id
*/
if (element->content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_COLLECTION ||
element->content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION) {
const unsigned char * idList = raptor_parser_internal_generate_id(rdf_parser, RAPTOR_GENID_TYPE_BNODEID, NULL);
/* <idList> rdf:type rdf:List */
raptor_uri *collection_uri=(element->content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION) ? RAPTOR_DAML_List_URI(rdf_xml_parser) : RAPTOR_RDF_List_URI(rdf_xml_parser);
if(!idList)
goto oom;
if((element->content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION) ||
rdf_parser->features[RAPTOR_FEATURE_ALLOW_RDF_TYPE_RDF_LIST])
raptor_rdfxml_generate_statement(rdf_parser,
NULL,
idList,
RAPTOR_IDENTIFIER_TYPE_ANONYMOUS,
RAPTOR_URI_SOURCE_ID,
RAPTOR_RDF_type_URI(rdf_xml_parser),
NULL,
RAPTOR_IDENTIFIER_TYPE_RESOURCE,
RAPTOR_URI_SOURCE_URI,
0,
collection_uri,
NULL,
RAPTOR_IDENTIFIER_TYPE_RESOURCE,
RAPTOR_URI_SOURCE_URI,
NULL,
NULL,
element);
collection_uri=(element->content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION) ? RAPTOR_DAML_first_URI(rdf_xml_parser) : RAPTOR_RDF_first_URI(rdf_xml_parser);
/* <idList> rdf:first <element->uri> */
raptor_rdfxml_generate_statement(rdf_parser,
NULL,
idList,
RAPTOR_IDENTIFIER_TYPE_ANONYMOUS,
RAPTOR_URI_SOURCE_ID,
collection_uri,
NULL,
RAPTOR_IDENTIFIER_TYPE_RESOURCE,
RAPTOR_URI_SOURCE_URI,
0,
element->subject.uri,
element->subject.id,
element->subject.type,
element->subject.uri_source,
NULL,
NULL,
NULL);
/* If there is no rdf:parseType="Collection" */
if (!element->parent->tail_id) {
int len;
unsigned char *new_id;
/* Free any existing object URI still around
* I suspect this can never happen.
*/
if(element->parent->object.uri)
raptor_free_uri(element->parent->object.uri);
len=strlen((char*)idList);
new_id=(unsigned char*)RAPTOR_MALLOC(cstring, len+1);
if(!len) {
if(new_id)
RAPTOR_FREE(cstring, new_id);
return;
}
strncpy((char*)new_id, (char*)idList, len+1);
element->parent->object.id=new_id;
element->parent->object.type=RAPTOR_IDENTIFIER_TYPE_ANONYMOUS;
element->parent->object.uri_source=RAPTOR_URI_SOURCE_ID;
} else {
collection_uri=(element->content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION) ? RAPTOR_DAML_rest_URI(rdf_xml_parser) : RAPTOR_RDF_rest_URI(rdf_xml_parser);
/* _:tail_id rdf:rest _:listRest */
raptor_rdfxml_generate_statement(rdf_parser,
NULL,
element->parent->tail_id,
RAPTOR_IDENTIFIER_TYPE_ANONYMOUS,
RAPTOR_URI_SOURCE_ID,
collection_uri,
NULL,
RAPTOR_IDENTIFIER_TYPE_RESOURCE,
RAPTOR_URI_SOURCE_URI,
0,
NULL,
idList,
RAPTOR_IDENTIFIER_TYPE_ANONYMOUS,
RAPTOR_URI_SOURCE_ID,
NULL,
NULL,
NULL);
}
/* update new tail */
if(element->parent->tail_id)
RAPTOR_FREE(cstring, (char*)element->parent->tail_id);
element->parent->tail_id=idList;
} else if(element->parent->state != RAPTOR_STATE_UNKNOWN &&
element->state != RAPTOR_STATE_PARSETYPE_RESOURCE) {
/* If there is a parent element (property) containing this
* element (node) and it has no object, set it from this subject
*/
if(element->parent->object.uri) {
raptor_rdfxml_update_document_locator(rdf_parser);
raptor_parser_error(rdf_parser, "Tried to set multiple objects of a statement");
} else {
/* Store URI of this node in our parent as the property object */
raptor_copy_identifier(&element->parent->object, &element->subject);
element->parent->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_RESOURCE;
}
}
}
/* If this is a node element, generate the rdf:type statement
* from this node
*/
if(state == RAPTOR_STATE_NODE_ELEMENT)
raptor_rdfxml_generate_statement(rdf_parser,
element->subject.uri,
element->subject.id,
element->subject.type,
element->subject.uri_source,
RAPTOR_RDF_type_URI(rdf_xml_parser),
NULL,
RAPTOR_IDENTIFIER_TYPE_RESOURCE,
RAPTOR_URI_SOURCE_URI,
0,
raptor_xml_element_get_name(xml_element)->uri,
NULL,
RAPTOR_IDENTIFIER_TYPE_RESOURCE,
element->object.uri_source,
NULL,
&element->reified,
element);
raptor_rdfxml_process_property_attributes(rdf_parser, element, element, NULL);
/* for both productions now need some more content or
* property elements before can do any more work.
*/
element->child_state=RAPTOR_STATE_PROPERTYELT;
element->child_content_type=RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTIES;
finished=1;
break;
case RAPTOR_STATE_PARSETYPE_OTHER:
/* FALLTHROUGH */
case RAPTOR_STATE_PARSETYPE_LITERAL:
raptor_xml_writer_start_element(rdf_xml_parser->xml_writer, xml_element);
element->child_state = RAPTOR_STATE_PARSETYPE_LITERAL;
element->child_content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_XML_LITERAL;
finished=1;
break;
/* Handle all the detail of the various options of property element
* http://www.w3.org/TR/rdf-syntax-grammar/#propertyElt
*
* All the attributes must be scanned here to see what additional
* property element work is needed. No triples are generated
* until the end of this element, until it is clear if the
* element was empty.
*/
case RAPTOR_STATE_MEMBER_PROPERTYELT:
case RAPTOR_STATE_PROPERTYELT:
if(!raptor_xml_element_get_name(xml_element)->uri) {
raptor_parser_error(rdf_parser, "Using property element '%s' without a namespace is forbidden.",
raptor_xml_element_get_name(element->parent->xml_element)->local_name);
raptor_rdfxml_update_document_locator(rdf_parser);
element->state=RAPTOR_STATE_SKIPPING;
element->child_state=RAPTOR_STATE_SKIPPING;
finished=1;
break;
}
/* Handling rdf:li as a property, noting special processing */
if(element_in_rdf_ns &&
raptor_uri_equals(raptor_xml_element_get_name(xml_element)->uri, RAPTOR_RDF_li_URI(rdf_xml_parser))) {
state=RAPTOR_STATE_MEMBER_PROPERTYELT;
}
if(element_in_rdf_ns &&
(rc = raptor_rdfxml_forbidden_propertyElement_name((const char*)el_name))) {
if(rc > 0) {
raptor_parser_error(rdf_parser, "rdf:%s is forbidden as a property element.", el_name);
state=RAPTOR_STATE_SKIPPING;
element->child_state=RAPTOR_STATE_SKIPPING;
finished=1;
break;
} else
raptor_parser_warning(rdf_parser, "rdf:%s is an unknown RDF namespaced element.", el_name);
}
/* rdf:ID on a property element - reify a statement.
* Allowed on all property element forms
*/
if(element->rdf_attr[RDF_ATTR_ID]) {
element->reified.id=element->rdf_attr[RDF_ATTR_ID];
element->rdf_attr[RDF_ATTR_ID]=NULL;
element->reified.uri=raptor_new_uri_from_id(base_uri, element->reified.id);
if(!element->reified.uri)
goto oom;
element->reified.type=RAPTOR_IDENTIFIER_TYPE_RESOURCE;
element->reified.uri_source=RAPTOR_URI_SOURCE_GENERATED;
if(!raptor_valid_xml_ID(rdf_parser, element->reified.id)) {
raptor_parser_error(rdf_parser, "Illegal rdf:ID value '%s'", element->reified.id);
state=RAPTOR_STATE_SKIPPING;
element->child_state=RAPTOR_STATE_SKIPPING;
finished=1;
break;
}
if(raptor_rdfxml_record_ID(rdf_parser, element, element->reified.id)) {
raptor_parser_error(rdf_parser, "Duplicated rdf:ID value '%s'", element->reified.id);
state=RAPTOR_STATE_SKIPPING;
element->child_state=RAPTOR_STATE_SKIPPING;
finished=1;
break;
}
}
/* rdf:datatype on a property element.
* Only allowed for
* http://www.w3.org/TR/rdf-syntax-grammar/#literalPropertyElt
*/
if (element->rdf_attr[RDF_ATTR_datatype]) {
element->object_literal_datatype=raptor_new_uri_relative_to_base(base_uri, (const unsigned char*)element->rdf_attr[RDF_ATTR_datatype]);
RAPTOR_FREE(cstring, (void*)element->rdf_attr[RDF_ATTR_datatype]);
element->rdf_attr[RDF_ATTR_datatype]=NULL;
if(!element->object_literal_datatype)
goto oom;
}
if(element->rdf_attr[RDF_ATTR_bagID]) {
if(rdf_parser->features[RAPTOR_FEATURE_ALLOW_BAGID]) {
if(element->rdf_attr[RDF_ATTR_resource] ||
element->rdf_attr[RDF_ATTR_parseType]) {
raptor_parser_error(rdf_parser, "rdf:bagID is forbidden on property element '%s' with an rdf:resource or rdf:parseType attribute.", el_name);
/* prevent this being used later either */
element->rdf_attr[RDF_ATTR_bagID]=NULL;
} else {
element->bag.id=element->rdf_attr[RDF_ATTR_bagID];
element->rdf_attr[RDF_ATTR_bagID]=NULL;
element->bag.uri=raptor_new_uri_from_id(base_uri, element->bag.id);
if(!element->bag.uri)
goto oom;
element->bag.type=RAPTOR_IDENTIFIER_TYPE_RESOURCE;
element->bag.uri_source=RAPTOR_URI_SOURCE_GENERATED;
if(!raptor_valid_xml_ID(rdf_parser, element->bag.id)) {
raptor_parser_error(rdf_parser, "Illegal rdf:bagID value '%s'", element->bag.id);
state=RAPTOR_STATE_SKIPPING;
element->child_state=RAPTOR_STATE_SKIPPING;
finished=1;
break;
}
if(raptor_rdfxml_record_ID(rdf_parser, element, element->bag.id)) {
raptor_parser_error(rdf_parser, "Duplicated rdf:bagID value '%s'", element->bag.id);
state=RAPTOR_STATE_SKIPPING;
element->child_state=RAPTOR_STATE_SKIPPING;
finished=1;
break;
}
raptor_parser_warning(rdf_parser, "rdf:bagID is deprecated.");
}
} else {
/* bagID forbidden */
raptor_parser_error(rdf_parser, "rdf:bagID is forbidden.");
state=RAPTOR_STATE_SKIPPING;
element->child_state=RAPTOR_STATE_SKIPPING;
finished=1;
break;
}
} /* if rdf:bagID on property element */
element->child_content_type=RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTY_CONTENT;
if (element->rdf_attr[RDF_ATTR_parseType]) {
const unsigned char *parse_type=element->rdf_attr[RDF_ATTR_parseType];
int i;
int is_parseType_Literal=0;
if(raptor_rdfxml_element_has_property_attributes(element)) {
raptor_parser_error(rdf_parser, "Property attributes cannot be used with rdf:parseType='%s'", parse_type);
state=RAPTOR_STATE_SKIPPING;
element->child_state=RAPTOR_STATE_SKIPPING;
finished=1;
break;
}
/* Check for bad combinations of things with parseType */
for(i=0; i<= RDF_ATTR_LAST; i++)
if(element->rdf_attr[i] && i != RDF_ATTR_parseType) {
raptor_parser_error(rdf_parser, "Attribute '%s' cannot be used with rdf:parseType='%s'", rdf_syntax_terms_info[i].name, parse_type);
state=RAPTOR_STATE_SKIPPING;
element->child_state=RAPTOR_STATE_SKIPPING;
finished=1;
break;
}
if(!strcmp((char*)parse_type, "Literal"))
is_parseType_Literal=1;
else if (!strcmp((char*)parse_type, "Resource")) {
state=RAPTOR_STATE_PARSETYPE_RESOURCE;
element->child_state=RAPTOR_STATE_PROPERTYELT;
element->child_content_type=RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTIES;
/* create a node for the subject of the contained properties */
element->subject.id=raptor_parser_internal_generate_id(rdf_parser, RAPTOR_GENID_TYPE_BNODEID, NULL);
if(!element->subject.id)
goto oom;
element->subject.type=RAPTOR_IDENTIFIER_TYPE_ANONYMOUS;
element->subject.uri_source=RAPTOR_URI_SOURCE_GENERATED;
} else if(!strcmp((char*)parse_type, "Collection")) {
/* An rdf:parseType="Collection" appears as a single node */
element->content_type=RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_RESOURCE;
element->child_state=RAPTOR_STATE_PARSETYPE_COLLECTION;
element->child_content_type=RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_COLLECTION;
} else {
if(rdf_parser->features[RAPTOR_FEATURE_ALLOW_OTHER_PARSETYPES] &&
!raptor_strcasecmp((char*)parse_type, "daml:collection")) {
/* A DAML collection appears as a single node */
element->content_type=RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_RESOURCE;
element->child_state=RAPTOR_STATE_PARSETYPE_COLLECTION;
element->child_content_type=RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION;
} else {
if(rdf_parser->features[RAPTOR_FEATURE_WARN_OTHER_PARSETYPES]) {
raptor_parser_warning(rdf_parser, "Unknown rdf:parseType value '%s' taken as 'Literal'", parse_type);
}
is_parseType_Literal=1;
}
}
if(is_parseType_Literal) {
/* rdf:parseType="Literal" - explicitly or default
* if the parseType value is not recognised
*/
const raptor_uri_handler *uri_handler;
void *uri_context;
raptor_uri_get_handler(&uri_handler, &uri_context);
rdf_xml_parser->xml_content=NULL;
rdf_xml_parser->xml_content_length=0;
rdf_xml_parser->iostream=raptor_new_iostream_to_string(&rdf_xml_parser->xml_content, &rdf_xml_parser->xml_content_length, raptor_alloc_memory);
if(!rdf_xml_parser->iostream)
goto oom;
rdf_xml_parser->xml_writer=raptor_new_xml_writer(NULL,
uri_handler, uri_context,
rdf_xml_parser->iostream,
(raptor_simple_message_handler)raptor_parser_simple_error, rdf_parser,
1);
if(!rdf_xml_parser->xml_writer)
goto oom;
raptor_xml_writer_set_feature(rdf_xml_parser->xml_writer,
RAPTOR_FEATURE_WRITER_XML_DECLARATION, 0);
element->child_state=RAPTOR_STATE_PARSETYPE_LITERAL;
element->content_type=RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_XML_LITERAL;
element->child_content_type=RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_XML_LITERAL;
}
} else {
/* Can only be the empty property element case
* http://www.w3.org/TR/rdf-syntax-grammar/#emptyPropertyElt
*/
/* The presence of the rdf:resource or rdf:nodeID
* attributes is checked at element close time
*/
/*
* Assign reified URI here so we don't reify property attributes
* using this id
*/
if(element->reified.id && !element->reified.uri) {
element->reified.uri=raptor_new_uri_from_id(base_uri, element->reified.id);
if(!element->reified.uri)
goto oom;
element->reified.type=RAPTOR_IDENTIFIER_TYPE_RESOURCE;
element->reified.uri_source=RAPTOR_URI_SOURCE_GENERATED;
}
if(element->rdf_attr[RDF_ATTR_resource] ||
element->rdf_attr[RDF_ATTR_nodeID]) {
/* Done - wait for end of this element to end in order to
* check the element was empty as expected */
element->content_type=RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_RESOURCE;
} else {
/* Otherwise process content in obj (value) state */
element->child_state=RAPTOR_STATE_NODE_ELEMENT_LIST;
element->content_type=RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTY_CONTENT;
}
}
finished=1;
break;
case RAPTOR_STATE_INVALID:
default:
raptor_parser_fatal_error(rdf_parser, "raptor_rdfxml_start_element_grammar: Unexpected parser state %d - %s", state, raptor_rdfxml_state_as_string(state));
finished=1;
} /* end switch */
if(state != element->state) {
element->state=state;
#ifdef RAPTOR_DEBUG_VERBOSE
RAPTOR_DEBUG3("Moved to state %d - %s\n", state, raptor_rdfxml_state_as_string(state));
#endif
}
} /* end while */
#ifdef RAPTOR_DEBUG_VERBOSE
RAPTOR_DEBUG2("Ending in state %s\n", raptor_rdfxml_state_as_string(state));
#endif
return;
oom:
raptor_parser_fatal_error(rdf_parser, "Out of memory, skipping");
element->state=RAPTOR_STATE_SKIPPING;
}
static void
raptor_rdfxml_end_element_grammar(raptor_parser *rdf_parser,
raptor_rdfxml_element *element)
{
raptor_state state;
int finished;
raptor_xml_element* xml_element=element->xml_element;
const unsigned char *el_name=raptor_xml_element_get_name(xml_element)->local_name;
int element_in_rdf_ns=(raptor_xml_element_get_name(xml_element)->nspace &&
raptor_xml_element_get_name(xml_element)->nspace->is_rdf_ms);
raptor_rdfxml_parser *rdf_xml_parser=(raptor_rdfxml_parser*)rdf_parser->context;
state=element->state;
#ifdef RAPTOR_DEBUG_VERBOSE
RAPTOR_DEBUG2("Starting in state %s\n", raptor_rdfxml_state_as_string(state));
#endif
finished= 0;
while(!finished) {
switch(state) {
case RAPTOR_STATE_SKIPPING:
finished=1;
break;
case RAPTOR_STATE_UNKNOWN:
finished=1;
break;
case RAPTOR_STATE_NODE_ELEMENT_LIST:
if(element_in_rdf_ns &&
raptor_uri_equals(raptor_xml_element_get_name(xml_element)->uri, RAPTOR_RDF_RDF_URI(rdf_xml_parser))) {
/* end of RDF - boo hoo */
state=RAPTOR_STATE_UNKNOWN;
finished=1;
break;
}
/* When scanning, another element ending is outside the RDF
* world so this can happen without further work
*/
if(rdf_parser->features[RAPTOR_FEATURE_SCANNING]) {
state=RAPTOR_STATE_UNKNOWN;
finished=1;
break;
}
/* otherwise found some junk after RDF content in an RDF-only
* document (probably never get here since this would be
* a mismatched XML tag and cause an error earlier)
*/
raptor_rdfxml_update_document_locator(rdf_parser);
raptor_parser_warning(rdf_parser, "Element '%s' ended, expected end of RDF element", el_name);
state=RAPTOR_STATE_UNKNOWN;
finished=1;
break;
case RAPTOR_STATE_DESCRIPTION:
case RAPTOR_STATE_NODE_ELEMENT:
case RAPTOR_STATE_PARSETYPE_RESOURCE:
/* If there is a parent element containing this element and
* the parent isn't a description, has an identifier,
* create the statement between this node using parent property
* (Need to check for identifier so that top-level typed nodes
* don't get connect to <rdf:RDF> parent element)
*/
if(state == RAPTOR_STATE_NODE_ELEMENT &&
element->parent &&
(element->parent->subject.uri || element->parent->subject.id))
raptor_rdfxml_generate_statement(rdf_parser,
element->parent->subject.uri,
element->parent->subject.id,
element->parent->subject.type,
element->parent->subject.uri_source,
raptor_xml_element_get_name(element->parent->xml_element)->uri,
NULL,
RAPTOR_IDENTIFIER_TYPE_RESOURCE,
RAPTOR_URI_SOURCE_ELEMENT,
0,
element->subject.uri,
element->subject.id,
element->subject.type,
element->subject.uri_source,
NULL,
NULL,
element);
else if(state == RAPTOR_STATE_PARSETYPE_RESOURCE &&
element->parent &&
(element->parent->subject.uri || element->parent->subject.id)) {
/* Handle rdf:li as the rdf:parseType="resource" property */
if(element_in_rdf_ns &&
raptor_uri_equals(raptor_xml_element_get_name(xml_element)->uri, RAPTOR_RDF_li_URI(rdf_xml_parser))) {
element->parent->last_ordinal++;
raptor_rdfxml_generate_statement(rdf_parser,
element->parent->subject.uri,
element->parent->subject.id,
element->parent->subject.type,
element->parent->subject.uri_source,
NULL,
NULL,
RAPTOR_IDENTIFIER_TYPE_ORDINAL,
RAPTOR_URI_SOURCE_NOT_URI,
element->parent->last_ordinal,
element->subject.uri,
element->subject.id,
element->subject.type,
element->subject.uri_source,
NULL,
&element->reified,
element->parent);
} else {
raptor_rdfxml_generate_statement(rdf_parser,
element->parent->subject.uri,
element->parent->subject.id,
element->parent->subject.type,
element->parent->subject.uri_source,
raptor_xml_element_get_name(xml_element)->uri,
NULL,
RAPTOR_IDENTIFIER_TYPE_RESOURCE,
RAPTOR_URI_SOURCE_ELEMENT,
0,
element->subject.uri,
element->subject.id,
element->subject.type,
element->subject.uri_source,
NULL,
&element->reified,
element->parent);
}
}
finished=1;
break;
case RAPTOR_STATE_PARSETYPE_COLLECTION:
finished=1;
break;
case RAPTOR_STATE_PARSETYPE_OTHER:
/* FALLTHROUGH */
case RAPTOR_STATE_PARSETYPE_LITERAL:
element->parent->content_type=RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_XML_LITERAL;
raptor_xml_writer_end_element(rdf_xml_parser->xml_writer, xml_element);
finished=1;
break;
case RAPTOR_STATE_PROPERTYELT:
case RAPTOR_STATE_MEMBER_PROPERTYELT:
/* A property element
* http://www.w3.org/TR/rdf-syntax-grammar/#propertyElt
*
* Literal content part is handled here.
* The element content is handled in the internal states
* Empty content is checked here.
*/
if(element->content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTY_CONTENT) {
if(xml_element->content_cdata_seen)
element->content_type= RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LITERAL;
else if (xml_element->content_element_seen)
element->content_type= RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTIES;
else { /* Empty Literal */
element->object.type= RAPTOR_IDENTIFIER_TYPE_LITERAL;
element->content_type= RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LITERAL;
}
}
/* Handle terminating a rdf:parseType="Collection" list */
if(element->child_content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_COLLECTION ||
element->child_content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION) {
raptor_uri* nil_uri=(element->child_content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION) ? RAPTOR_DAML_nil_URI(rdf_xml_parser) : RAPTOR_RDF_nil_URI(rdf_xml_parser);
if (!element->tail_id) {
/* If No List: set object of statement to rdf:nil */
element->object.uri= raptor_uri_copy(nil_uri);
element->object.id= NULL;
element->object.type= RAPTOR_IDENTIFIER_TYPE_RESOURCE;
element->object.uri_source= RAPTOR_URI_SOURCE_URI;
} else {
raptor_uri* rest_uri=(element->child_content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION) ? RAPTOR_DAML_rest_URI(rdf_xml_parser) : RAPTOR_RDF_rest_URI(rdf_xml_parser);
/* terminate the list */
raptor_rdfxml_generate_statement(rdf_parser,
NULL,
element->tail_id,
RAPTOR_IDENTIFIER_TYPE_ANONYMOUS,
RAPTOR_URI_SOURCE_ID,
rest_uri,
NULL,
RAPTOR_IDENTIFIER_TYPE_RESOURCE,
RAPTOR_URI_SOURCE_URI,
0,
nil_uri,
NULL,
RAPTOR_IDENTIFIER_TYPE_RESOURCE,
RAPTOR_URI_SOURCE_URI,
NULL,
NULL,
NULL);
}
} /* end rdf:parseType="Collection" termination */
#ifdef RAPTOR_DEBUG_VERBOSE
RAPTOR_DEBUG3("Content type %s (%d)\n", raptor_rdfxml_element_content_type_as_string(element->content_type), element->content_type);
#endif
switch(element->content_type) {
case RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_RESOURCE:
if(raptor_rdfxml_element_has_property_attributes(element) &&
element->child_state == RAPTOR_STATE_DESCRIPTION) {
raptor_parser_error(rdf_parser, "Property element '%s' has both property attributes and a node element content", el_name);
state=RAPTOR_STATE_SKIPPING;
element->child_state=RAPTOR_STATE_SKIPPING;
finished=1;
break;
}
if(element->object.type == RAPTOR_IDENTIFIER_TYPE_UNKNOWN) {
if(element->rdf_attr[RDF_ATTR_resource]) {
element->object.uri=raptor_new_uri_relative_to_base(raptor_rdfxml_inscope_base_uri(rdf_parser),
(const unsigned char*)element->rdf_attr[RDF_ATTR_resource]);
RAPTOR_FREE(cstring, (void*)element->rdf_attr[RDF_ATTR_resource]);
element->rdf_attr[RDF_ATTR_resource]=NULL;
if(!element->object.uri)
goto oom;
element->object.type=RAPTOR_IDENTIFIER_TYPE_RESOURCE;
element->object.uri_source=RAPTOR_URI_SOURCE_URI;
element->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_RESOURCE;
} else if(element->rdf_attr[RDF_ATTR_nodeID]) {
element->object.id=raptor_parser_internal_generate_id(rdf_parser, RAPTOR_GENID_TYPE_BNODEID, (unsigned char*)element->rdf_attr[RDF_ATTR_nodeID]);
element->rdf_attr[RDF_ATTR_nodeID]=NULL;
if(!element->object.id)
goto oom;
element->object.type=RAPTOR_IDENTIFIER_TYPE_ANONYMOUS;
element->object.uri_source=RAPTOR_URI_SOURCE_BLANK_ID;
element->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_RESOURCE;
if(!raptor_valid_xml_ID(rdf_parser, element->object.id)) {
raptor_parser_error(rdf_parser, "Illegal rdf:nodeID value '%s'", element->object.id);
state=RAPTOR_STATE_SKIPPING;
element->child_state=RAPTOR_STATE_SKIPPING;
finished=1;
break;
}
} else {
element->object.id=raptor_parser_internal_generate_id(rdf_parser, RAPTOR_GENID_TYPE_BNODEID, NULL);
if(!element->object.id)
goto oom;
element->object.type=RAPTOR_IDENTIFIER_TYPE_ANONYMOUS;
element->object.uri_source=RAPTOR_URI_SOURCE_GENERATED;
element->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_RESOURCE;
}
raptor_rdfxml_process_property_attributes(rdf_parser, element,
element->parent,
&element->object);
}
/* We know object is a resource, so delete any unsignficant
* whitespace so that FALLTHROUGH code below finds the object.
*/
if(xml_element->content_cdata_length) {
raptor_free_stringbuffer(xml_element->content_cdata_sb);
xml_element->content_cdata_sb=NULL;
xml_element->content_cdata_length=0;
}
/* FALLTHROUGH */
case RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LITERAL:
if(element->content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LITERAL) {
if(rdf_parser->features[RAPTOR_FEATURE_ALLOW_BAGID]) {
/* Only an empty literal can have a rdf:bagID */
if(element->bag.uri || element->bag.id) {
if(xml_element->content_cdata_length > 0) {
raptor_parser_error(rdf_parser, "rdf:bagID is forbidden on a literal property element '%s'.", el_name);
/* prevent this being used later either */
element->rdf_attr[RDF_ATTR_bagID]=NULL;
} else
raptor_rdfxml_generate_statement(rdf_parser,
element->bag.uri,
element->bag.id,
element->bag.type,
element->bag.uri_source,
RAPTOR_RDF_type_URI(rdf_xml_parser),
NULL,
RAPTOR_IDENTIFIER_TYPE_RESOURCE,
RAPTOR_URI_SOURCE_URI,
0,
RAPTOR_RDF_Bag_URI(rdf_xml_parser),
NULL,
RAPTOR_IDENTIFIER_TYPE_RESOURCE,
RAPTOR_URI_SOURCE_NOT_URI,
NULL,
NULL,
NULL);
}
} /* if rdf:bagID */
/* If there is empty literal content with properties
* generate a node to hang properties off
*/
if(raptor_rdfxml_element_has_property_attributes(element) &&
xml_element->content_cdata_length > 0) {
raptor_parser_error(rdf_parser, "Literal property element '%s' has property attributes", el_name);
state=RAPTOR_STATE_SKIPPING;
element->child_state=RAPTOR_STATE_SKIPPING;
finished=1;
break;
}
if(element->object.type == RAPTOR_IDENTIFIER_TYPE_LITERAL &&
raptor_rdfxml_element_has_property_attributes(element) &&
!element->object.uri) {
element->object.id=raptor_parser_internal_generate_id(rdf_parser, RAPTOR_GENID_TYPE_BNODEID, NULL);
if(!element->object.id)
goto oom;
element->object.type=RAPTOR_IDENTIFIER_TYPE_ANONYMOUS;
element->object.uri_source=RAPTOR_URI_SOURCE_GENERATED;
element->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_RESOURCE;
}
raptor_rdfxml_process_property_attributes(rdf_parser, element,
element,
&element->object);
}
/* just be friendly to older compilers and don't declare
* variables in the middle of a block
*/
if(1) {
raptor_uri *predicate_uri=NULL;
raptor_identifier_type predicate_type;
int predicate_ordinal=0;
raptor_uri *object_uri;
raptor_identifier_type object_type;
raptor_uri *literal_datatype=NULL;
const unsigned char* empty_literal=(const unsigned char*)"";
if(state == RAPTOR_STATE_MEMBER_PROPERTYELT) {
element->parent->last_ordinal++;
predicate_ordinal=element->parent->last_ordinal;
predicate_type=RAPTOR_IDENTIFIER_TYPE_ORDINAL;
} else {
predicate_uri=raptor_xml_element_get_name(xml_element)->uri;
predicate_type=RAPTOR_IDENTIFIER_TYPE_RESOURCE;
}
if(element->content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LITERAL) {
unsigned char* literal;
object_type=RAPTOR_IDENTIFIER_TYPE_LITERAL;
literal=raptor_stringbuffer_as_string(xml_element->content_cdata_sb);
literal_datatype=element->object_literal_datatype;
if(!literal_datatype && literal &&
!raptor_utf8_is_nfc(literal, xml_element->content_cdata_length)) {
const char *message="Property element '%s' has a string not in Unicode Normal Form C: %s";
raptor_rdfxml_update_document_locator(rdf_parser);
if(rdf_parser->features[RAPTOR_FEATURE_NON_NFC_FATAL])
raptor_parser_error(rdf_parser, message, el_name, literal);
else
raptor_parser_warning(rdf_parser, message, el_name, literal);
}
if(!literal)
/* empty literal */
literal=(unsigned char*)empty_literal;
object_uri=(raptor_uri*)literal;
} else {
object_type=element->object.type;
object_uri=element->object.uri;
}
raptor_rdfxml_generate_statement(rdf_parser,
element->parent->subject.uri,
element->parent->subject.id,
element->parent->subject.type,
RAPTOR_URI_SOURCE_ELEMENT,
predicate_uri,
NULL,
predicate_type,
RAPTOR_URI_SOURCE_NOT_URI,
predicate_ordinal,
object_uri,
element->object.id,
object_type,
element->object.uri_source,
literal_datatype,
&element->reified,
element->parent);
}
break;
case RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PRESERVED:
case RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_XML_LITERAL:
{
unsigned char *buffer;
unsigned int length;
if(rdf_xml_parser->xml_writer) {
raptor_xml_writer_flush(rdf_xml_parser->xml_writer);
raptor_free_iostream(rdf_xml_parser->iostream);
rdf_xml_parser->iostream=NULL;
buffer=(unsigned char*)rdf_xml_parser->xml_content;
length=rdf_xml_parser->xml_content_length;
} else {
buffer=raptor_stringbuffer_as_string(xml_element->content_cdata_sb);
length=xml_element->content_cdata_length;
}
if(!raptor_utf8_is_nfc(buffer, length)) {
const char *message="Property element '%s' has XML literal content not in Unicode Normal Form C: %s";
raptor_rdfxml_update_document_locator(rdf_parser);
if(rdf_parser->features[RAPTOR_FEATURE_NON_NFC_FATAL])
raptor_parser_error(rdf_parser, message, el_name, buffer);
else
raptor_parser_warning(rdf_parser, message, el_name, buffer);
}
if(state == RAPTOR_STATE_MEMBER_PROPERTYELT) {
element->parent->last_ordinal++;
raptor_rdfxml_generate_statement(rdf_parser,
element->parent->subject.uri,
element->parent->subject.id,
element->parent->subject.type,
element->parent->subject.uri_source,
NULL,
NULL,
RAPTOR_IDENTIFIER_TYPE_ORDINAL,
RAPTOR_URI_SOURCE_NOT_URI,
element->parent->last_ordinal,
(raptor_uri*)buffer,
NULL,
RAPTOR_IDENTIFIER_TYPE_LITERAL,
RAPTOR_URI_SOURCE_NOT_URI,
RAPTOR_RDF_XMLLiteral_URI(rdf_xml_parser),
&element->reified,
element->parent);
} else {
raptor_rdfxml_generate_statement(rdf_parser,
element->parent->subject.uri,
element->parent->subject.id,
element->parent->subject.type,
element->parent->subject.uri_source,
raptor_xml_element_get_name(xml_element)->uri,
NULL,
RAPTOR_IDENTIFIER_TYPE_RESOURCE,
RAPTOR_URI_SOURCE_ELEMENT,
0,
(raptor_uri*)buffer,
NULL,
RAPTOR_IDENTIFIER_TYPE_LITERAL,
RAPTOR_URI_SOURCE_NOT_URI,
RAPTOR_RDF_XMLLiteral_URI(rdf_xml_parser),
&element->reified,
element->parent);
}
/* Finish the xml writer iostream for parseType="Literal" */
if(rdf_xml_parser->xml_writer) {
raptor_free_xml_writer(rdf_xml_parser->xml_writer);
RAPTOR_FREE(cstring, rdf_xml_parser->xml_content);
rdf_xml_parser->xml_content=NULL;
rdf_xml_parser->xml_content_length=0;
}
}
break;
case RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_COLLECTION:
case RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION:
case RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_NODES:
case RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTIES:
case RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTY_CONTENT:
case RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_UNKNOWN:
case RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LAST:
default:
raptor_parser_fatal_error(rdf_parser, "%s: Internal error in state RAPTOR_STATE_PROPERTYELT - got unexpected content type %s (%d)", __func__, raptor_rdfxml_element_content_type_as_string(element->content_type), element->content_type);
} /* end switch */
finished=1;
break;
case RAPTOR_STATE_INVALID:
default:
raptor_parser_fatal_error(rdf_parser, "raptor_rdfxml_end_element_grammar: Unexpected parser state %d - %s", state, raptor_rdfxml_state_as_string(state));
finished=1;
} /* end switch */
if(state != element->state) {
element->state=state;
#ifdef RAPTOR_DEBUG_VERBOSE
RAPTOR_DEBUG3("Moved to state %d - %s\n", state, raptor_rdfxml_state_as_string(state));
#endif
}
} /* end while */
#ifdef RAPTOR_DEBUG_VERBOSE
RAPTOR_DEBUG2("Ending in state %s\n", raptor_rdfxml_state_as_string(state));
#endif
return;
oom:
raptor_parser_fatal_error(rdf_parser, "Out of memory, skipping");
element->state=RAPTOR_STATE_SKIPPING;
}
static void
raptor_rdfxml_cdata_grammar(raptor_parser *rdf_parser,
const unsigned char *s, int len,
int is_cdata)
{
raptor_rdfxml_parser* rdf_xml_parser;
raptor_rdfxml_element* element;
raptor_xml_element* xml_element;
raptor_state state;
int all_whitespace=1;
int i;
rdf_xml_parser=(raptor_rdfxml_parser*)rdf_parser->context;
if(rdf_parser->failed)
return;
#ifdef RAPTOR_DEBUG_CDATA
RAPTOR_DEBUG2("Adding characters (is_cdata=%d): '", is_cdata);
(void)fwrite(s, 1, len, stderr);
fprintf(stderr, "' (%d bytes)\n", len);
#endif
for(i=0; i<len; i++)
if(!isspace(s[i])) {
all_whitespace=0;
break;
}
element=rdf_xml_parser->current_element;
/* this file is very broke - probably not XML, whatever */
if(!element)
return;
xml_element=element->xml_element;
raptor_rdfxml_update_document_locator(rdf_parser);
/* cdata never changes the parser state
* and the containing element state always determines what to do.
* Use the child_state first if there is one, since that applies
*/
state=element->child_state;
#ifdef RAPTOR_DEBUG_VERBOSE
RAPTOR_DEBUG2("Working in state %s\n", raptor_rdfxml_state_as_string(state));
#endif
#ifdef RAPTOR_DEBUG_VERBOSE
RAPTOR_DEBUG3("Content type %s (%d)\n", raptor_rdfxml_element_content_type_as_string(element->content_type), element->content_type);
#endif
if(state == RAPTOR_STATE_SKIPPING)
return;
if(state == RAPTOR_STATE_UNKNOWN) {
/* Ignore all cdata if still looking for RDF */
if(rdf_parser->features[RAPTOR_FEATURE_SCANNING])
return;
/* Ignore all whitespace cdata before first element */
if(all_whitespace)
return;
/* This probably will never happen since that would make the
* XML not be well-formed
*/
raptor_parser_warning(rdf_parser, "Character data before RDF element.");
}
if(element->child_content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTIES) {
/* If found non-whitespace content, move to literal content */
if(!all_whitespace)
element->child_content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LITERAL;
}
if(!rdf_content_type_info[element->child_content_type].whitespace_significant) {
/* Whitespace is ignored except for literal or preserved content types */
if(all_whitespace) {
#ifdef RAPTOR_DEBUG_CDATA
RAPTOR_DEBUG2("Ignoring whitespace cdata inside element '%s'\n", raptor_xml_element_get_name(element->parent->xml_element)->local_name);
#endif
return;
}
if(xml_element->content_cdata_seen && xml_element->content_element_seen) {
/* Uh oh - mixed content, this element has elements too */
raptor_parser_warning(rdf_parser, "element '%s' has mixed content.",
raptor_xml_element_get_name(element->parent->xml_element)->local_name);
}
}
if(element->content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTY_CONTENT) {
element->content_type=RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LITERAL;
#ifdef RAPTOR_DEBUG_VERBOSE
RAPTOR_DEBUG3("Content type changed to %s (%d)\n", raptor_rdfxml_element_content_type_as_string(element->content_type), element->content_type);
#endif
}
if(element->child_content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_XML_LITERAL)
raptor_xml_writer_cdata_counted(rdf_xml_parser->xml_writer, s, len);
else {
raptor_stringbuffer_append_counted_string(xml_element->content_cdata_sb,
s, len, 1);
element->content_cdata_all_whitespace &= all_whitespace;
/* adjust stored length */
xml_element->content_cdata_length += len;
}
#ifdef RAPTOR_DEBUG_CDATA
RAPTOR_DEBUG3("Content cdata now: %d bytes\n", xml_element->content_cdata_length);
#endif
#ifdef RAPTOR_DEBUG_VERBOSE
RAPTOR_DEBUG2("Ending in state %s\n", raptor_rdfxml_state_as_string(state));
#endif
}
/**
* raptor_rdfxml_inscope_base_uri:
* @rdf_parser: Raptor parser object
*
* Return the in-scope base URI.
*
* Looks for the innermost xml:base on an element or document URI
*
* Return value: The URI string value or NULL on failure.
**/
static raptor_uri*
raptor_rdfxml_inscope_base_uri(raptor_parser *rdf_parser)
{
raptor_rdfxml_parser *rdf_xml_parser=(raptor_rdfxml_parser*)rdf_parser->context;
raptor_uri* base_uri;
base_uri=raptor_sax2_inscope_base_uri(rdf_xml_parser->sax2);
if(!base_uri)
base_uri=rdf_parser->base_uri;
return base_uri;
}
/**
* raptor_rdfxml_record_ID:
* @rdf_parser: Raptor parser object
* @element: Current element
* @id: ID string
*
* Record an rdf:ID / rdf:bagID value (with xml base) and check it hasn't been seen already.
*
* Record and check the ID values, if they have been seen already.
* per in-scope-base URI.
*
* Return value: non-zero if already seen, or failure
**/
static int
raptor_rdfxml_record_ID(raptor_parser *rdf_parser,
raptor_rdfxml_element *element,
const unsigned char *id)
{
raptor_rdfxml_parser *rdf_xml_parser=(raptor_rdfxml_parser*)rdf_parser->context;
raptor_uri* base_uri=raptor_rdfxml_inscope_base_uri(rdf_parser);
size_t id_len=strlen((const char*)id);
int rc;
if(!rdf_parser->features[RAPTOR_FEATURE_CHECK_RDF_ID])
return 0;
rc=raptor_id_set_add(rdf_xml_parser->id_set, base_uri, id, id_len);
return (rc != 0);
}
static void
raptor_rdfxml_update_document_locator(raptor_parser *rdf_parser)
{
raptor_rdfxml_parser *rdf_xml_parser=(raptor_rdfxml_parser*)rdf_parser->context;
raptor_sax2_update_document_locator(rdf_xml_parser->sax2,
&rdf_parser->locator);
}
static void
raptor_rdfxml_parse_finish_factory(raptor_parser_factory* factory)
{
}
static int
raptor_rdfxml_parser_register_factory(raptor_parser_factory *factory)
{
int rc=0;
factory->context_length = sizeof(raptor_rdfxml_parser);
factory->need_base_uri = 1;
factory->init = raptor_rdfxml_parse_init;
factory->terminate = raptor_rdfxml_parse_terminate;
factory->start = raptor_rdfxml_parse_start;
factory->chunk = raptor_rdfxml_parse_chunk;
factory->finish_factory = raptor_rdfxml_parse_finish_factory;
factory->recognise_syntax = raptor_rdfxml_parse_recognise_syntax;
rc+= raptor_parser_factory_add_alias(factory, "raptor") != 0;
rc+= raptor_parser_factory_add_uri(factory,
(const unsigned char*)"http://www.w3.org/TR/rdf-syntax-grammar") != 0;
rc+= raptor_parser_factory_add_mime_type(factory, "application/rdf+xml", 10) != 0;
rc+= raptor_parser_factory_add_mime_type(factory, "text/rdf", 6) != 0;
return rc;
}
int
raptor_init_parser_rdfxml(void)
{
return !raptor_parser_register_factory("rdfxml", "RDF/XML",
&raptor_rdfxml_parser_register_factory);
}
#if RAPTOR_DEBUG > 1
void
raptor_rdfxml_parser_stats_print(raptor_rdfxml_parser* rdf_xml_parser,
FILE *stream)
{
fputs("rdf:ID set ", stream);
raptor_id_set_stats_print(rdf_xml_parser->id_set, stream);
}
#endif