/* -*- Mode: c; c-basic-offset: 2 -*- * * rdql_lexer.l - Rasqal RDQL lexer - making tokens for rdql grammar generator * * Copyright (C) 2003-2008, David Beckett http://www.dajobe.org/ * Copyright (C) 2003-2005, University of Bristol, UK http://www.bristol.ac.uk/ * * This package is Free Software and part of Redland http://librdf.org/ * * It is licensed under the following three licenses as alternatives: * 1. GNU Lesser General Public License (LGPL) V2.1 or any newer version * 2. GNU General Public License (GPL) V2 or any newer version * 3. Apache License, V2.0 or any newer version * * You may not use this file except in compliance with at least one of * the above three licenses. * * See LICENSE.html or LICENSE.txt at the top of this package for the * complete terms and further detail along with the license texts for * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively. * * To generate the C files from this source, rather than use the * shipped rdql_lexer.c/.h needs a patched version of flex 2.5.31 such * as the one available in Debian GNU/Linux. Details below * near the %option descriptions. * */ /* recognise 8-bits */ %option 8bit %option warn nodefault /* all symbols prefixed by this */ %option prefix="rdql_lexer_" /* This is not needed, flex is invoked -ordql_lexer.c */ %option outfile="rdql_lexer.c" /* Emit a C header file for prototypes * Only available in flex 2.5.13 or newer. * It was renamed to header-file in flex 2.5.19 */ %option header-file="rdql_lexer.h" /* Do not emit #include * Only available in flex 2.5.7 or newer. * Broken in flex 2.5.31 without patches. */ %option nounistd /* Never interactive */ /* No isatty() check */ %option never-interactive /* Batch scanner */ %option batch /* Never use yyunput */ %option nounput /* Supply our own alloc/realloc/free functions */ %option noyyalloc noyyrealloc noyyfree /* Re-entrant scanner */ %option reentrant %x PATTERN ID /* definitions */ %{ /* NOTE: These headers are NOT included here. They are inserted by fix-flex * since otherwise it appears far too late in the generated C */ /* #ifdef HAVE_CONFIG_H #include #endif #ifdef WIN32 #include #endif */ #include #include #include #include #ifdef HAVE_SETJMP_H #include #endif #include #include #include #include static int rdql_skip_c_comment(rasqal_query *rq); static unsigned char* rdql_copy_name(rasqal_query *rq, const unsigned char *text, size_t len); static unsigned char* rdql_copy_qname(rasqal_query *rq, const unsigned char *text, size_t len); static int rdql_copy_regex_token(rasqal_query *rq, YYSTYPE* lval, unsigned char delim); static int rdql_copy_string_token(rasqal_query *rq, YYSTYPE* lval, const unsigned char *text, size_t len, int delim); #ifdef RASQAL_DEBUG const char * rdql_token_print(int token, YYSTYPE *lval); #endif int rdql_lexer_lex (YYSTYPE *rdql_parser_lval, yyscan_t yyscanner); #define YY_DECL int rdql_lexer_lex (YYSTYPE *rdql_parser_lval, yyscan_t yyscanner) #ifdef __cplusplus #define INPUT_FN yyinput #else #define INPUT_FN input #endif /* Remove the re-fill function since it should never be called */ #define YY_INPUT(buf,result,max_size) { return YY_NULL; } /* Missing rdql_lexer.c/h prototypes */ int rdql_lexer_get_column(yyscan_t yyscanner); void rdql_lexer_set_column(int column_no , yyscan_t yyscanner); static void rdql_lexer_cleanup(yyscan_t yyscanner); #ifdef HAVE_SETJMP static jmp_buf rdql_lexer_fatal_error_longjmp_env; /* fatal error handler declaration */ #define YY_FATAL_ERROR(msg) do { \ rdql_lexer_fatal_error(msg, yyscanner); \ longjmp(rdql_lexer_fatal_error_longjmp_env, 1); \ } while(0) #else #define YY_FATAL_ERROR(msg) do { \ rdql_lexer_fatal_error(msg, yyscanner); \ abort(); \ } while(0) #endif static void rdql_lexer_fatal_error(yyconst char *msg, yyscan_t yyscanner); %} LANGUAGETOKEN [A-Za-z][-A-Z_a-z0-9]* /* See * http://www.w3.org/TR/xml11/#NT-NameStartChar and * http://www.w3.org/TR/xml11/#NT-NameChar */ PREFIX [A-Za-z\\\x80-\xff][-A-Z_a-z\\\x80-\xff0-9]* NAME [A-Za-z_\\\x80-\xff][-A-Z_a-z\\\x80-\xff0-9]* QNAME {PREFIX}:{NAME}* /* The initial char conditions are to ensure this doesn't grab < or <= */ QUOTEDURI \<[A-Za-z][^>]+\> %% /* rules */ %{ int c; rasqal_query *rq=(rasqal_query*)yyextra; rasqal_rdql_query_engine *rqe=(rasqal_rdql_query_engine*)rq->context; #ifdef HAVE_SETJMP if(setjmp(rdql_lexer_fatal_error_longjmp_env)) return 1; #endif %} "//"[^\r\n]*(\r\n|\r|\n) { /* C++ comment */ rqe->lineno++; } "/*" { int lines=rdql_skip_c_comment(rq); if(lines < 0) yyterminate(); rqe->lineno += lines; } \r\n|\r|\n { rqe->lineno++; } [\ \t\v]+ { /* eat up other whitespace */ ; } "SELECT"|"select" { return SELECT; } "SOURCE"|"source" { return SOURCE; } "FROM"|"from" { return FROM; } "WHERE"|"where" { return WHERE; } "AND"|"and" { return AND; } "USING"|"using" { return USING; } "FOR"|"for" { return FOR; } "," { return ','; } "(" { return '('; } ")" { return ')'; } "?" { BEGIN(ID); return '?'; } "||" { return SC_OR; } "&&" { return SC_AND; } "EQ"|"eq" { return STR_EQ; } "NE"|"NE" { return STR_NE; } "=~"|"~~" { BEGIN(PATTERN); return STR_MATCH; } "!~" { BEGIN(PATTERN); return STR_NMATCH; } [ \t\v]+ { ; } \r\n|\r|\n { rqe->lineno++; } . { /* first non whitespace */ if(!rdql_copy_regex_token(rq, rdql_parser_lval, *yytext)) { BEGIN(INITIAL); return PATTERN_LITERAL; } BEGIN(INITIAL); yyterminate(); }; "==" { return EQ; } "!=" { return NEQ; } "<"/[^A-Za-z=] { return LT; } ">" { return GT; } "<=" { return LE; } ">=" { return GE; } "+" { return '+'; } "-" { return '-'; } "*" { return '*'; } "/" { return '/'; } "%" { return '%'; } "~" { return '~'; } "!" { return '!'; } [0-9]+["lL"]? { c=yytext[yyleng-1]; if (c== 'l' || c == 'L') yytext[yyleng-1]='\0'; rdql_parser_lval->literal=rasqal_new_integer_literal(rq->world, RASQAL_LITERAL_INTEGER, atoi(yytext)); return INTEGER_LITERAL; } 0[xX][0-9a-fA-F]+ { int i; int n; if(yytext[yyleng+1] == 'x') n=sscanf(yytext+2, "%x", &i); else n=sscanf(yytext+2, "%X", &i); if(n != 1) { rdql_syntax_error(rq, "RDQL syntax error - Illegal hex constant %c%c%c", yytext[0], yytext[1], yytext[2]); yyterminate(); } rdql_parser_lval->literal=rasqal_new_integer_literal(rq->world, RASQAL_LITERAL_INTEGER, i); return INTEGER_LITERAL; } [0-9]+"."[0-9]*[eE][+-]?[0-9]+[fFdD]?|"."[0-9]+[eE][+-]?[0-9]+[fFdD]?|[0-9]+[eE][+-]?[0-9]+[fFdD]?|[0-9]+[eE][+-]?[0-9]+[fFdD]?|[0-9]+"."[0-9]* { rdql_parser_lval->literal=rasqal_new_typed_literal(rq->world, RASQAL_LITERAL_DOUBLE, (const unsigned char*)yytext); if(!rdql_parser_lval->literal) yyterminate(); return FLOATING_POINT_LITERAL; } '([^'\\\n\r]|\\[^\n\r])*'(@{LANGUAGETOKEN})?(^^({QUOTEDURI}|{QNAME}))? { /*' */ if(!rdql_copy_string_token(rq, rdql_parser_lval, (const unsigned char*)yytext+1, yyleng-1, '\'')) return STRING_LITERAL; yyterminate(); } \"([^"\\\n\r]|\\[^\n\r])*\"(@{LANGUAGETOKEN})?(^^({QUOTEDURI}|{QNAME}))? { /* " */ if(!rdql_copy_string_token(rq, rdql_parser_lval, (const unsigned char*)yytext+1, yyleng-1, '"')) return STRING_LITERAL; yyterminate(); } "true"|"false" { rdql_parser_lval->literal=rasqal_new_boolean_literal(rq->world, *yytext== 't'); return BOOLEAN_LITERAL; } "null" { rdql_parser_lval->literal=NULL; return NULL_LITERAL; } <*>{NAME} { rdql_parser_lval->name=rdql_copy_name(rq, (const unsigned char*)yytext, yyleng); if(!rdql_parser_lval->name) yyterminate(); BEGIN(INITIAL); return IDENTIFIER; } (.|\n) { BEGIN(INITIAL); rdql_syntax_error(rq, "RDQL syntax error - missing variable name after ?"); yyterminate(); } {QNAME} { rdql_parser_lval->name=rdql_copy_qname(rq, (const unsigned char*)yytext, yyleng); if(!rdql_parser_lval->name) yyterminate(); return QNAME_LITERAL; } \<{QNAME}\> { rdql_parser_lval->name=rdql_copy_qname(rq, (const unsigned char*)yytext+1, yyleng-2); if(!rdql_parser_lval->name) yyterminate(); rdql_syntax_warning(rq, "Obsolete RDQL syntax found in \"%s\"", (const unsigned char*)yytext); return QNAME_LITERAL; } {QUOTEDURI} { if(yyleng == 2) rdql_parser_lval->uri=raptor_uri_copy(rq->base_uri); else { yytext[yyleng-1]='\0'; rdql_parser_lval->uri=raptor_new_uri_relative_to_base(rq->base_uri, (const unsigned char*)yytext+1); } return URI_LITERAL; } \# { while((c=INPUT_FN(yyscanner)) != '\n' && c) ; } . { if (!*yytext) return EOF; rdql_syntax_error(rq, "RDQL syntax error at '%s'", yytext); yyterminate(); } %% /* user code */ int yywrap (yyscan_t yyscanner) { return 1; } static unsigned char * rdql_copy_name(rasqal_query *rq, const unsigned char *text, size_t len) { size_t dest_len=0; unsigned char *s; s=rasqal_escaped_name_to_utf8_string((unsigned char*)text, len, &dest_len, (raptor_simple_message_handler)rdql_syntax_error, rq); if(!s) return s; if(!raptor_xml_name_check(s, dest_len, 11)) rdql_syntax_warning(rq, "Invalid RDQL name \"%s\"", s); return s; } static unsigned char * rdql_copy_qname(rasqal_query *rq, const unsigned char *text, size_t len) { unsigned char *p; size_t dest_len=0; unsigned char *s; s=rasqal_escaped_name_to_utf8_string((unsigned char*)text, len, &dest_len, (raptor_simple_message_handler)rdql_syntax_error, rq); if(!s) return s; p=(unsigned char*)strchr((const char*)s, ':'); if(!raptor_xml_name_check(s, p-s, 11)) rdql_syntax_warning(rq, "Invalid RDQL name \"%s\"", s); if(!raptor_xml_name_check(p+1, dest_len-((p+1)-s), 11)) rdql_syntax_warning(rq, "Invalid RDQL name \"%s\"", p+1); return s; } static int rdql_copy_regex_token(rasqal_query* rq, YYSTYPE* lval, unsigned char delim) { rasqal_rdql_query_engine *rqe=(rasqal_rdql_query_engine*)rq->context; yyscan_t yyscanner=rqe->scanner; unsigned int ind=0; size_t buffer_len=0; unsigned char *buffer=NULL; size_t flags_len=0; unsigned char *flags=NULL; int c; if(delim == 'm') { /* Handle pattern literal m/foo/ */ c=INPUT_FN(yyscanner); if(c == EOF) { rdql_syntax_error(rq, "RDQL syntax error - EOF in regex"); return 1; } delim=(unsigned char)c; } while((c=INPUT_FN(yyscanner)) && c != EOF && c != delim) { /* May add 2 chars - \' */ if(ind+2 > buffer_len) { unsigned char *new_buffer; size_t new_buffer_len=buffer_len <<1; if(new_buffer_len<10) new_buffer_len=10; new_buffer=(unsigned char *)RASQAL_CALLOC(cstring, 1, new_buffer_len+1); if(buffer_len) { strncpy((char*)new_buffer, (const char*)buffer, buffer_len); RASQAL_FREE(cstring, buffer); } buffer=new_buffer; buffer_len=new_buffer_len; } buffer[ind++]=c; if(c == '\\') { c=INPUT_FN(yyscanner); buffer[ind++]=c; } } if(!buffer) { buffer_len=0; buffer=(unsigned char *)RASQAL_CALLOC(cstring, 1, buffer_len+1); } buffer[ind]='\0'; if(c == EOF) { rdql_syntax_error(rq, "RDQL syntax error - EOF in regex"); return 1; } /* flags */ ind=0; while((c=INPUT_FN(yyscanner)) && c != EOF && isalpha(c)) { if(ind+1 > flags_len) { unsigned char *new_flags; size_t new_flags_len=flags_len + 5; if(new_flags_len<5) new_flags_len=5; new_flags=(unsigned char *)RASQAL_CALLOC(cstring, 1, new_flags_len+1); if(flags_len) { strncpy((char*)new_flags, (const char*)flags, flags_len); RASQAL_FREE(cstring, flags); } flags=new_flags; flags_len=new_flags_len; } flags[ind++]=c; } if(flags) flags[ind]='\0'; lval->literal=rasqal_new_pattern_literal(rq->world, buffer, (const char*)flags); return 0; } static int rdql_copy_string_token(rasqal_query* rq, YYSTYPE* lval, const unsigned char *text, size_t len, int delim) { unsigned int i; const unsigned char *s; unsigned char *d; unsigned char *string=(unsigned char *)RASQAL_MALLOC(cstring, len+1); char *language=NULL; unsigned char *dt=NULL; raptor_uri *dt_uri=NULL; unsigned char *dt_qname=NULL; for(s=text, d=string, i=0; i len) { printf("\\%c over end of line", c); RASQAL_FREE(cstring, string); return 1; } n=sscanf((const char*)s, ((ulen == 4) ? "%04lx" : "%08lx"), &unichar); if(n != 1) { rdql_syntax_error(rq, "RDQL syntax error - Illegal Unicode escape '%c%s...'", c, s); RASQAL_FREE(cstring, string); return 1; } s+= ulen-1; i+= ulen-1; if(unichar > 0x10ffff) { rdql_syntax_error(rq, "RDQL syntax error - Illegal Unicode character with code point #x%lX.", unichar); RASQAL_FREE(cstring, string); return 1; } d+=raptor_unicode_char_to_utf8(unichar, d); } else { /* Ignore \x where x isn't the one of: \n \r \t \\ (delim) \u \U */ rdql_syntax_warning(rq, "Unknown RDQL string escape \\%c in \"%s\"", c, text); *d++=c; } } else if(c== delim) { *d++='\0'; /* skip delim */ s++; i++; c=*s++; i++; if(c=='@') { language=(char*)d; while(i<=len) { c=*s++; i++; if(!isalpha(c) && !isdigit(c) && c != '-') break; *d++=c; } *d++='\0'; } if(c=='^') { /* skip second char of ^^ */ s++; i++; dt=d; while(i++<=len) *d++=*s++; /* *d='\0' below */ } else if (language) *d='\0'; break; } else *d++=c; } /* end of for */ *d='\0'; if(language) { char *new_language=(char *)RASQAL_MALLOC(cstring, strlen((const char*)language)+1); strcpy(new_language, language); language=new_language; } if(dt) { /* dt can be a URI or qname */ if(*dt == '<') { dt[strlen((const char*)dt)-1]='\0'; dt_uri=raptor_new_uri(dt+1); } else { size_t dt_qname_len=strlen((const char*)dt); if(!raptor_xml_name_check(dt, dt_qname_len, 11)) rdql_syntax_warning(rq, "Invalid RDQL name \"%s\"", dt); /* the qname is expanded later */ dt_qname=(unsigned char *)RASQAL_MALLOC(cstring, dt_qname_len+1); strcpy((char*)dt_qname, (const char*)dt); } } #if RASQAL_DEBUG >3 fprintf(stderr, "string='%s', language='%s'\n", string, (language ? language : "")); fprintf(stderr, "dt uri='%s',qname='%s'\n", (dt_uri ? (const char*)raptor_uri_as_string(dt_uri) : ""), (dt_qname ? (const char*)dt_qname : "")); #endif lval->literal=rasqal_new_string_literal(rq->world, string, language, dt_uri, dt_qname); return 0; } static int rdql_skip_c_comment(rasqal_query *rq) { rasqal_rdql_query_engine *rqe=(rasqal_rdql_query_engine*)rq->context; yyscan_t yyscanner=rqe->scanner; int lines=0; int c; while(1) { while ((c=INPUT_FN(yyscanner)) != '*' && c!= EOF) { if(c == '\r' || c == '\n') lines++; } if( c == '*') { while ((c=INPUT_FN(yyscanner)) == '*') { if(c == '\r' || c == '\n') lines++; } if(c == '/') break; } if (c == EOF) { rdql_syntax_error(rq, "RDQL syntax error - EOF in comment"); lines= -1; break; } } return lines; } /* * rdql_lexer_fatal_error: * * INTERNAL - replacement for the generated error handler. * Uses rasqal_query_fatal_error() when possible. */ static void rdql_lexer_fatal_error(yyconst char *msg, yyscan_t yyscanner) { rasqal_query *rq=NULL; if(yyscanner) rq=(rasqal_query *)rdql_lexer_get_extra(yyscanner); if(rq) { /* avoid "format not a string literal and no format arguments" warning with %s */ rq->failed=1; rasqal_log_error_simple(rq->world, RAPTOR_LOG_LEVEL_FATAL, &rq->locator, "%s", msg); } else (void)fprintf(stderr, "%s\n", msg); abort(); } /* Define LEXER_ALLOC_TRACKING to enable allocated memory tracking * - fixes lexer memory leak when ensure_buffer_stack fails */ #ifdef LEXER_ALLOC_TRACKING typedef struct { /* Number of void* slots allocated */ int lexer_allocs_size; /* Allocted void* slots follow in memory after this header */ } lexer_alloc_tracker_header; /* Initial alloc tracker slot array size - 2 seems to be enough for almost all cases */ static const int initial_lexer_allocs_size=2; #endif /* * rdql_lexer_cleanup: * @yyscanner: * * INTERNAL - Clean up unfreed lexer allocs if LEXER_ALLOC_TRACKING is enabled. */ static void rdql_lexer_cleanup(yyscan_t yyscanner) { #ifdef LEXER_ALLOC_TRACKING rasqal_query *rq; lexer_alloc_tracker_header *tracker; void **lexer_allocs; int i; if(!yyscanner) return; rq=(rasqal_query *)rdql_lexer_get_extra(yyscanner); if(!rq) return; tracker=(lexer_alloc_tracker_header *)rq->lexer_user_data; if(!tracker) return; lexer_allocs=(void**)&tracker[1]; for(i=0; ilexer_allocs_size; ++i) { if(lexer_allocs[i]) free(lexer_allocs[i]); lexer_allocs[i]=NULL; } free(rq->lexer_user_data); rq->lexer_user_data=NULL; #endif } /* * rdql_lexer_alloc: * @size * @yyscanner * * INTERNAL - alloc replacement. * Tracks allocated cells if LEXER_ALLOC_TRACKING is enabled. */ void *rdql_lexer_alloc(yy_size_t size, yyscan_t yyscanner) { #ifdef LEXER_ALLOC_TRACKING rasqal_query *rq; lexer_alloc_tracker_header *tracker; void **lexer_allocs; int i; void *ptr; /* yyscanner not initialized -> probably initializing yyscanner itself * -> just malloc without tracking */ if(!yyscanner) return malloc(size); rq=(rasqal_query *)rdql_lexer_get_extra(yyscanner); if(!rq) YY_FATAL_ERROR("lexer_alloc: yyscanner extra not initialized"); /* try to allocate tracker if it does not exist */ tracker=(lexer_alloc_tracker_header *)rq->lexer_user_data; if(!tracker) { /* allocate tracker header + array of void* slots */ tracker=calloc(1, sizeof(lexer_alloc_tracker_header)+initial_lexer_allocs_size*sizeof(void*)); if(!tracker) YY_FATAL_ERROR("lexer_alloc: cannot allocate tracker"); tracker->lexer_allocs_size=initial_lexer_allocs_size; rq->lexer_user_data=(void *)tracker; } lexer_allocs=(void**)&tracker[1]; /* allocate memory */ ptr=malloc(size); /* find a free slot for ptr */ for(i=0; ilexer_allocs_size; ++i) { if(!lexer_allocs[i]) { lexer_allocs[i]=ptr; break; } } /* no free slots -> grow tracker slot array */ if(i>=tracker->lexer_allocs_size) { int j; void **dest; tracker=calloc(1, sizeof(lexer_alloc_tracker_header)+i*2*sizeof(void*)); if(!tracker) { if(ptr) free(ptr); YY_FATAL_ERROR("lexer_alloc: cannot grow tracker"); } tracker->lexer_allocs_size=i*2; /* copy data from old tracker */ dest=(void**)&tracker[1]; for(j=0; jlexer_user_data); rq->lexer_user_data=tracker; } return ptr; #else return malloc(size); #endif } /* * rdql_lexer_realloc: * * INTERNAL - realloc replacement * Tracks allocated cells if LEXER_ALLOC_TRACKING is enabled. */ void *rdql_lexer_realloc(void *ptr, yy_size_t size, yyscan_t yyscanner) { #ifdef LEXER_ALLOC_TRACKING rasqal_query *rq; lexer_alloc_tracker_header *tracker; void **lexer_allocs; int i; void *newptr; if(!yyscanner) YY_FATAL_ERROR("lexer_realloc: yyscanner not initialized"); rq=(rasqal_query *)rdql_lexer_get_extra(yyscanner); if(!rq) YY_FATAL_ERROR("lexer_realloc: yyscanner extra not initialized"); tracker=(lexer_alloc_tracker_header *)rq->lexer_user_data; if(!tracker) YY_FATAL_ERROR("lexer_realloc: no alloc tracker"); lexer_allocs=(void**)&tracker[1]; /* find the old slot for ptr */ for(i=0; ilexer_allocs_size; ++i) { if(lexer_allocs[i]==ptr) break; } /* no old slot -> error */ if(i>=tracker->lexer_allocs_size) YY_FATAL_ERROR("lexer_realloc: cell not in tracker"); /* realloc */ newptr=realloc((char*)ptr, size); /* replace entry in tracker */ lexer_allocs[i]=newptr; return newptr; #else return realloc((char*)ptr, size); #endif } /* * rdql_lexer_free: * * INTERNAL - free replacement. * Checks for NULL pointer to be freed unlike the default lexer free function. * Tracks allocated cells if LEXER_ALLOC_TRACKING is enabled. */ void rdql_lexer_free(void *ptr, yyscan_t yyscanner) { #ifdef LEXER_ALLOC_TRACKING rasqal_query *rq; lexer_alloc_tracker_header *tracker; void **lexer_allocs; int i; /* do not free NULL */ if(!ptr) return; /* free ptr even if we would encounter an error */ free(ptr); /* yyscanner is allocated with rdql_lexer_alloc() but it's never stored in the tracker * - we need yyscanner to access the tracker */ if(!yyscanner || ptr==yyscanner) return; rq=(rasqal_query *)rdql_lexer_get_extra(yyscanner); if(!rq) return; tracker=(lexer_alloc_tracker_header *)rq->lexer_user_data; if(!tracker) return; lexer_allocs=(void**)&tracker[1]; /* find the slot for ptr */ for(i=0; ilexer_allocs_size; ++i) { if(lexer_allocs[i]==ptr) break; } /* no slot -> error */ if(i>=tracker->lexer_allocs_size) YY_FATAL_ERROR("lexer_free: cell not in tracker"); /* remove entry from tracker */ lexer_allocs[i]=NULL; #else if(ptr) free(ptr); #endif } #ifdef RASQAL_DEBUG const char * rdql_token_print(int token, YYSTYPE *lval) { static char buffer[2048]; if(!token) return "<>"; switch(token) { case SELECT: return "SELECT"; case SOURCE: return "SOURCE"; case FROM: return "FROM"; case WHERE: return "WHERE"; case AND: return "AND"; case FOR: return "FOR"; case ',': return ","; case '(': return "("; case ')': return "("; case '?': return "?"; case USING: return "USING"; case SC_AND: return "SC_AND"; case SC_OR: return "SC_OR"; case STR_NMATCH: return "STR_NMATCH"; case STR_MATCH: return "STR_MATCH"; case STR_NE: return "STR_NE"; case STR_EQ: return "STR_EQ"; case GE: return "GE"; case LE: return "LE"; case GT: return "GT"; case LT: return "LT"; case NEQ: return "NEQ"; case EQ: return "EQ"; case '%': return "%"; case '/': return "/"; case '*': return "*"; case '-': return "-"; case '+': return "+"; case '!': return "!"; case '~': return "~"; case INTEGER_LITERAL: sprintf(buffer, "INTEGER_LITERAL(%d)", lval->literal->value.integer); return buffer; case FLOATING_POINT_LITERAL: sprintf(buffer, "FLOATING_POINT_LITERAL(%g)", lval->floating); return buffer; case STRING_LITERAL: if(lval->literal->language) { if(lval->literal->datatype) sprintf(buffer, "STRING_LITERAL(\"%s\"@%s^^%s)", lval->literal->string, lval->literal->language, raptor_uri_as_string(lval->literal->datatype)); else sprintf(buffer, "STRING_LITERAL(\"%s\"@%s)", lval->literal->string, lval->literal->language); } else { if(lval->literal->datatype) sprintf(buffer, "STRING_LITERAL(\"%s\"^^%s)", lval->literal->string, raptor_uri_as_string(lval->literal->datatype)); else sprintf(buffer, "STRING_LITERAL(\"%s\")", lval->literal->string); } return buffer; case PATTERN_LITERAL: sprintf(buffer, "PATTERN_LITERAL(%s,%s)", lval->literal->string, (lval->literal->flags ? (char*)lval->literal->flags : "-")); return buffer; case BOOLEAN_LITERAL: return (lval->literal->value.integer ? "BOOLEAN_LITERAL(true)" : "BOOLEAN_LITERAL(false)"); case NULL_LITERAL: return "NULL_LITERAL"; case URI_LITERAL: sprintf(buffer, "URI_LITERAL(%s)", raptor_uri_as_string(lval->uri)); return buffer; case QNAME_LITERAL: sprintf(buffer, "QNAME_LITERAL(%s)", lval->name); return buffer; case IDENTIFIER: sprintf(buffer, "IDENTIFIER(%s)", lval->name); return buffer; default: RASQAL_DEBUG2("UNKNOWN token %d - add a new case\n", token); abort(); } } #endif #ifdef STANDALONE static void rdql_token_free(int token, YYSTYPE *lval) { if(!token) return; switch(token) { case STRING_LITERAL: case PATTERN_LITERAL: rasqal_free_literal(lval->literal); break; case URI_LITERAL: raptor_free_uri(lval->uri); break; case IDENTIFIER: RASQAL_FREE(cstring, lval->name); break; case QNAME_LITERAL: if(lval->name) RASQAL_FREE(cstring, lval->name); break; default: break; } } #define FILE_READ_BUF_SIZE 2048 int main(int argc, char *argv[]) { const char *program=rasqal_basename(argv[0]); char *query_string=NULL; rasqal_query rq; rasqal_rdql_query_engine rdql; yyscan_t scanner; int token=EOF; YYSTYPE lval; const unsigned char *uri_string; const char *filename=NULL; char *buf=NULL; size_t len; void *buffer; rasqal_world *world; world=rasqal_new_world(); if(argc > 1) { FILE *fh; query_string=(char*)RASQAL_CALLOC(cstring, FILE_READ_BUF_SIZE, 1); filename=argv[1]; fh=fopen(filename, "r"); if(fh) { fread(query_string, FILE_READ_BUF_SIZE, 1, fh); fclose(fh); } else { fprintf(stderr, "%s: Cannot open file %s - %s\n", program, filename, strerror(errno)); exit(1); } } else { filename=""; query_string=(char*)RASQAL_CALLOC(cstring, FILE_READ_BUF_SIZE, 1); fread(query_string, FILE_READ_BUF_SIZE, 1, stdin); } memset(&rq, 0, sizeof(rasqal_query)); rq.world=world; memset(&rdql, 0, sizeof(rasqal_rdql_query_engine)); yylex_init(&rdql.scanner); scanner=rdql.scanner; #if 0 /* set * %option debug * above first before enabling this */ rdql_lexer_set_debug(1, scanner); #endif len= strlen((const char*)query_string); buf= (char *)RASQAL_MALLOC(cstring, len+3); strncpy(buf, query_string, len); buf[len]= ' '; buf[len+1]= buf[len+2]='\0'; /* YY_END_OF_BUFFER_CHAR; */ buffer= rdql_lexer__scan_buffer(buf, len+3, scanner); rdql_lexer_set_extra(&rq, scanner); /* Initialise enough of the rasqal_query and locator to get error messages */ rq.context=&rdql; rdql.lineno=1; rq.locator.file=filename; rq.locator.column= -1; uri_string=raptor_uri_filename_to_uri_string(filename); rq.base_uri=raptor_new_uri(uri_string); raptor_free_memory((void*)uri_string); while(1) { memset(&lval, 0, sizeof(YYSTYPE)); if(rdql_lexer_get_text(scanner) != NULL) printf("yyinput '%s'\n", rdql_lexer_get_text(scanner)); token=yylex(&lval, scanner); #ifdef RASQAL_DEBUG printf("token %s\n", rdql_token_print(token, &lval)); #else printf("token %d\n", token); #endif rdql_token_free(token, &lval); if(!token || token == EOF) break; } if(buf) RASQAL_FREE(cstring, buf); yylex_destroy(scanner); raptor_free_uri(rq.base_uri); RASQAL_FREE(cstring, query_string); rasqal_free_world(world); if(rq.failed) return 1; return 0; } #endif