00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00033 %option yylineno
00034
00035 %{
00036
00037 using namespace std;
00038
00039 #include <sstream>
00040 #include <vector>
00041 #include <list>
00042 #include "typeenums.h"
00043 #include "parseexception.h"
00044
00045
00046 class Expression;
00047 class PathExpression;
00048 class PathStepExpression;
00049 class VarExpression;
00050 class ConstExpression;
00051 class CondExpression;
00052 class OperandExpression;
00053 class VarStepExpression;
00054 class AggregateFunctExpression;
00055 class RoundingExpression;
00056
00061 typedef list< pair<VarExpression*, Expression*> > var_list_type;
00062
00063 #include "query_parser.h"
00064
00071 void yyerror(const char *text);
00072
00077 vector<int> stack;
00078
00083 int comment_level = 0;
00084
00089 ostringstream comment;
00090
00097 void push_state(int state);
00098
00104 void pop_state();
00105
00114 char * extract_const_string(const char * text);
00115
00116 %}
00117
00118
00119
00120
00121
00122 space [ \t\r\n]
00123 whitespace {space}*
00124 whitespaceplus {space}+
00125 WS {whitespace}
00126
00127
00128
00129 base_char [a-zA-Z]
00130 extender '\183'
00131 digit [0-9]
00132
00133
00134
00135 letter {base_char}
00136 nmstart {letter}|'_'
00137 nmchar {letter}|{extender}|{digit}|"."|"-"|"_"
00138
00139 ncname {nmstart}{nmchar}*
00140 qname {ncname}
00141 varname {qname}
00142
00143
00144
00145 CharRef "&#"[0-9]+";"|"&#x"[0-9a-fA-F]+";"
00146
00147 PredefinedEntityRef "&"("lt"|"gt"|"amp"|"quot"|"apos")";"
00148
00149 StringLiteral ("\""({PredefinedEntityRef}|{CharRef}|("\"\"")|[^"&])*"\"")|("'"({PredefinedEntityRef}|{CharRef}|("''")|[^'&])*"'")
00150
00151 NumericLiteral [0-9]+(\.[0-9]+)?
00152
00153 UnterminatedString (\"|"''")[^\"\n]*$
00154
00155 elementcontent ({CharRef}|{PredefinedEntityRef}|[^{}<&]|"{{"|"}}")*
00156
00157
00158 %x COMMENT OPERATOR S_VARNAME START_TAG ELEMENT_CONTENT END_TAG
00159 %x QUOT_ATTRIBUTE_CONTENT APOS_ATTRIBUTE_CONTENT
00160
00161 %%
00162
00163 "fn:doc" {
00164 return DOC;
00165 }
00166
00167 "fn:not" {
00168 return NOT;
00169 }
00170
00171 "fn:exists" {
00172 return EXISTS;
00173 }
00174
00175 "fn:empty" {
00176 return EMPTY;
00177 }
00178
00179 "fn:true()"|"true()" {
00180 BEGIN(OPERATOR);
00181 return XTRUE;
00182 }
00183
00184 "fn:false()"|"false()" {
00185 BEGIN(OPERATOR);
00186 return XFALSE;
00187 }
00188
00189 "fn:sum" {
00190 return SUM;
00191 }
00192
00193 "fn:avg" {
00194 return AVG;
00195 }
00196
00197 "fn:min" {
00198 return MIN;
00199 }
00200
00201 "fn:max" {
00202 return MAX;
00203 }
00204
00205 "fn:count" {
00206 return COUNT;
00207 }
00208
00209 "fn:stddev_samp" {
00210 return STDDEV_SAMP;
00211 }
00212
00213 "fn:stddev_pop" {
00214 return STDDEV_POP;
00215 }
00216
00217 "fn:var_samp" {
00218 return VAR_SAMP;
00219 }
00220
00221 "fn:var_pop" {
00222 return VAR_POP;
00223 }
00224
00225 "fn:median" {
00226 return MEDIAN;
00227 }
00228
00229 "fn:abs" {
00230 return ABS;
00231 }
00232
00233 "fn:ceiling" {
00234 return CEILING;
00235 }
00236
00237 "fn:cover" {
00238 return COVER;
00239 }
00240
00241 "fn:floor" {
00242 return FLOOR;
00243 }
00244
00245 "fn:round" {
00246 return ROUND;
00247 }
00248
00249 "fn:round-half-to-even" {
00250 return ROUNDHALFTOEVEN;
00251 }
00252
00253 "fn:truncate" {
00254 return TRUNCATE;
00255 }
00256
00257 "text()" {
00258 BEGIN(OPERATOR);
00259 return XP_TEXT;
00260 }
00261
00262 "node()" {
00263 BEGIN(OPERATOR);
00264 return XP_NODE;
00265 }
00266
00267 "(:" {
00268 BEGIN(COMMENT);
00269 comment_level++;
00270 return COMMENT_START;
00271 }
00272
00273 ":)" {
00274 yyerror("Comment Not Properly Opened");
00275 }
00276
00277 {whitespaceplus} {
00278 /* ignore whitespace */
00279 }
00280
00281 {qname} {
00282 BEGIN(OPERATOR);
00283 yylval.str = strdup(yytext);
00284 return QNAME;
00285 }
00286
00287 {StringLiteral} {
00288 BEGIN(OPERATOR);
00289 yylval.str = extract_const_string(yytext);
00290 return STRINGCONST;
00291 }
00292
00293 {NumericLiteral} {
00294 BEGIN(OPERATOR);
00295 yylval.str = strdup(yytext);
00296 return NUMERICCONST;
00297 }
00298
00299 ")" {
00300 BEGIN(OPERATOR);
00301 return BCLOSE;
00302 }
00303
00304 "*" {
00305 BEGIN(OPERATOR);
00306 return NT_STAR;
00307 }
00308
00309 "$" {
00310 BEGIN(S_VARNAME);
00311 return VAR_SIGN;
00312 }
00313
00314 "for"{whitespaceplus}"$" {
00315 /* BEGIN(S_VARNAME); */
00316 unput('$');
00317 return FOR;
00318 }
00319
00320 "<" {
00321 push_state(OPERATOR);
00322 BEGIN(START_TAG);
00323 return RELOP_LT;
00324 }
00325
00326 "}" {
00327 pop_state();
00328 return CBCLOSE;
00329 }
00330
00331 "{" {
00332 push_state(OPERATOR);
00333 return CBOPEN;
00334 }
00335
00336 "," {
00337 return COMMA;
00338 }
00339
00340 "(" {
00341 return BOPEN;
00342 }
00343
00344 "if"{whitespace}"(" {
00345 unput('(');
00346 return IF;
00347 }
00348
00349 "/" {
00350 return SLASH;
00351 }
00352
00353 "child::" {
00354 return CHILD;
00355 }
00356
00357 "
00358 return DSLASH;
00359 }
00360
00361 "descendant::" {
00362 return DESCENDANT;
00363 }
00364 <COMMENT>{
00365 ":)" {
00366 comment_level--;
00367 if (comment_level <= 0) {
00368 BEGIN(INITIAL);
00369 yylval.str = strdup(comment.str().c_str());
00370
00371 return COMMENTCONTENT;
00372 } else {
00373 comment << yytext;
00374 }
00375 }
00376
00377 "(:" {
00378 comment << yytext;
00379 comment_level++;
00380 }
00381
00382 \n {
00383 comment << yytext;
00384 }
00385
00386 . {
00387 comment << yytext;
00388 }
00389
00390 {StringLiteral} {
00391 comment << yytext;
00392 }
00393
00394 <<EOF>> {
00395 yyerror("Comment(s) Not Properly Closed At End Of File");
00396 }
00397 }
00398
00399 <OPERATOR>{
00400 "(:" {
00401 BEGIN(COMMENT);
00402 comment_level++;
00403 return COMMENT_START;
00404 }
00405
00406 "{" {
00407 push_state(OPERATOR);
00408 BEGIN(0);
00409 return CBOPEN;
00410 }
00411
00412 "then" {
00413 BEGIN(0);
00414 return THEN;
00415 }
00416
00417 "else" {
00418 BEGIN(0);
00419 return ELSE;
00420 }
00421
00422 "and" {
00423 BEGIN(0);
00424 return AND;
00425 }
00426
00427 "," {
00428 BEGIN(0);
00429 return COMMA;
00430 }
00431
00432 "=" {
00433 BEGIN(0);
00434 return RELOP_EQ;
00435 }
00436
00437 "!=" {
00438 BEGIN(0);
00439 return RELOP_NEQ;
00440 }
00441
00442 "<" {
00443 BEGIN(0);
00444 return RELOP_LT;
00445 }
00446
00447 ">" {
00448 BEGIN(0);
00449 return RELOP_GT;
00450 }
00451
00452 "<=" {
00453 BEGIN(0);
00454 return RELOP_LEQ;
00455 }
00456
00457 ">=" {
00458 BEGIN(0);
00459 return RELOP_GEQ;
00460 }
00461
00462 "in" {
00463 BEGIN(0);
00464 return IN;
00465 }
00466
00467 "*" {
00468 BEGIN(0);
00469 return NT_STAR;
00470 }
00471
00472 "or" {
00473 BEGIN(0);
00474 return OR;
00475 }
00476
00477 "where" {
00478 BEGIN(0);
00479 return WHERE;
00480 }
00481
00482 "return" {
00483 BEGIN(0);
00484 return XRETURN;
00485 }
00486
00487 "/" {
00488 BEGIN(0);
00489 return SLASH;
00490 }
00491
00492 "child::" {
00493 BEGIN(0);
00494 return CHILD;
00495 }
00496
00497 "//" {
00498 BEGIN(0);
00499 return DSLASH;
00500 }
00501
00502 "descendant::" {
00503 BEGIN(0);
00504 return DESCENDANT;
00505 }
00506
00507 "}" {
00508 pop_state();
00509 return CBCLOSE;
00510 }
00511
00512 "$" {
00513 BEGIN(S_VARNAME);
00514 return VAR_SIGN;
00515 }
00516
00517 "for"{whitespaceplus}"$" {
00518
00519 unput('$');
00520 return FOR;
00521 }
00522
00523 ")" {
00524 return BCLOSE;
00525 }
00526
00527 {StringLiteral} {
00528 yylval.str = extract_const_string(yytext);
00529 return STRINGCONST;
00530 }
00531
00532 {NumericLiteral} {
00533 yylval.str = strdup(yytext);
00534 return NUMERICCONST;
00535 }
00536
00537 {WS} {
00538
00539 }
00540 }
00541
00542 <S_VARNAME>{varname} {
00543 BEGIN(OPERATOR);
00544 yylval.str = strdup(yytext);
00545 return VARNAME;
00546 }
00547
00548 <START_TAG>{
00549 "/>" {
00550 pop_state();
00551 return GTSLASH;
00552 }
00553
00554 ">" {
00555 BEGIN(ELEMENT_CONTENT);
00556 return RELOP_GT;
00557 }
00558
00559 "\"" {
00560 BEGIN(QUOT_ATTRIBUTE_CONTENT);
00561 yyerror("Feature \"Attributes\" Not Yet Implemented");
00562 }
00563
00564 "'" {
00565 BEGIN(APOS_ATTRIBUTE_CONTENT);
00566 yyerror("Feature \"Attributes\" Not Yet Implemented");
00567 }
00568
00569 "=" {
00570 yyerror("Feature \"Attributes\" Not Yet Implemented");
00571 }
00572
00573 {WS} {
00574
00575 }
00576
00577 {qname} {
00578 yylval.str = strdup(yytext);
00579 return QNAME;
00580 }
00581 }
00582
00583 <ELEMENT_CONTENT>{
00584 "</" {
00585 BEGIN(END_TAG);
00586 return LTSLASH;
00587 }
00588
00589 "{" {
00590 push_state(ELEMENT_CONTENT);
00591 BEGIN(0);
00592 return CBOPEN;
00593 }
00594
00595 "<" {
00596 push_state(ELEMENT_CONTENT);
00597 BEGIN(START_TAG);
00598 return RELOP_LT;
00599 }
00600
00601 {WS} {
00602
00603 }
00604
00605 {elementcontent} {
00606 char * newstr = new char[strlen(yytext) + 1];
00607 unsigned lastpos = 0;
00608 bool last_skipped = false;
00609
00610 for(unsigned i=0; i<strlen(yytext); i++) {
00611 if (i==0) {
00612 newstr[0] = yytext[0];
00613 lastpos = 0;
00614 } else {
00615 if(!((yytext[i-1]=='{' && yytext[i]=='{') || (yytext[i-1]=='}' && yytext[i]=='}')) || last_skipped) {
00616 newstr[++lastpos] = yytext[i];
00617 last_skipped = false;
00618 } else {
00619 last_skipped = true;
00620 }
00621 }
00622 }
00623 newstr[lastpos+1] = '\0';
00624
00625 yylval.str = newstr;
00626 return ELEMENTCONTENT;
00627 }
00628 }
00629
00630 <END_TAG>{
00631 ">" {
00632 pop_state();
00633 return RELOP_GT;
00634 }
00635
00636 {WS} {
00637
00638 }
00639
00640 {qname} {
00641 yylval.str = strdup(yytext);
00642 return QNAME;
00643 }
00644 }
00645
00646 <*>. {
00647
00648 yyerror("Call Error");
00649 }
00650
00651 <*>{UnterminatedString} {
00652 yyerror("Unterminated String");
00653 }
00654
00655 %%
00656
00662 int yywrap(void) {
00663 return 1;
00664 }
00665
00671 void pop_state() {
00672 int resume_state = stack.back();
00673 stack.pop_back();
00674 BEGIN(resume_state);
00675 }
00676
00683 void push_state(int state) {
00684 stack.push_back(state);
00685 }
00686
00687 #include <iostream>
00688
00697 char * extract_const_string(const char * text) {
00698 char * str = strncpy(new char[strlen(text) - 1 ], yytext+1, strlen(text) - 2);
00699 str[strlen(text)-2] = '\0';
00700
00701 return str;
00702 }
00703
00710 void yyerror(const char *text) {
00711 std::ostringstream o;
00712 o << "Query Parsing In Line " << yylineno << " => " << text;
00713 if (yytext && (int)yytext!=EOF && strcmp(yytext, "")!=0) {
00714 o << " (\"" << yytext << "\")";
00715 }
00716 throw ParseException(o.str().c_str(), eid_parse_query);
00717 }