Commit 32ede45d authored by unknown's avatar unknown

Bug#20854 XML functions: wrong result in ExtractValue


mysql-test/r/xml.result:
  - Adding test case
  - Fixing error message
mysql-test/t/xml.test:
  Adding test case
sql/item_xmlfunc.cc:
  For grammar rules with loops like:
  
    AdditiveExpr ::= MultiplicativeExpr ('+' MultiplicativeExpr)*
  
  If we scanned scanned '+' and then met an error when parsing
  MultiplicativeExpr, then we should fully stop parsing - without
  trying to apply any other rules.
  
  Fix: add "error" member into MY_XPATH structure,
  and make my_xpath_parse_term() never return success
  as soon as error set.
strings/xml.c:
  Adding my_xml_ctype map for flags, indicating
  whether a character is a space character, is a
  valid identifier start character, is a valid
  identifier body character. Using this map to
  properly scan identifiers. Also, using this map
  to scan spaces faster (instead of strchr).
parent 9e89ea6f
...@@ -570,7 +570,7 @@ select extractvalue('<a>a<b>B</b></a>','a|/b'); ...@@ -570,7 +570,7 @@ select extractvalue('<a>a<b>B</b></a>','a|/b');
extractvalue('<a>a<b>B</b></a>','a|/b') extractvalue('<a>a<b>B</b></a>','a|/b')
a a
select extractvalue('<a>A</a>','/<a>'); select extractvalue('<a>A</a>','/<a>');
ERROR HY000: XPATH syntax error: '<a>' ERROR HY000: XPATH error: comparison of two nodesets is not supported: '<a>'
select extractvalue('<a><b>b</b><b!>b!</b!></a>','//b!'); select extractvalue('<a><b>b</b><b!>b!</b!></a>','//b!');
ERROR HY000: XPATH syntax error: '!' ERROR HY000: XPATH syntax error: '!'
select extractvalue('<a>A<b>B<c>C</c></b></a>','/a/descendant::*'); select extractvalue('<a>A<b>B<c>C</c></b></a>','/a/descendant::*');
...@@ -710,3 +710,29 @@ Data ...@@ -710,3 +710,29 @@ Data
select extractValue('<foo><foo.bar>Data</foo.bar><something>Otherdata</something></foo>','/foo/something'); select extractValue('<foo><foo.bar>Data</foo.bar><something>Otherdata</something></foo>','/foo/something');
extractValue('<foo><foo.bar>Data</foo.bar><something>Otherdata</something></foo>','/foo/something') extractValue('<foo><foo.bar>Data</foo.bar><something>Otherdata</something></foo>','/foo/something')
Otherdata Otherdata
select extractValue('<zot><tim0><01>10:39:15</01><02>140</02></tim0></zot>','/zot/tim0/02');
ERROR HY000: XPATH syntax error: '02'
select extractValue('<zot><tim0><01>10:39:15</01><02>140</02></tim0></zot>','//*');
extractValue('<zot><tim0><01>10:39:15</01><02>140</02></tim0></zot>','//*')
NULL
Warnings:
Warning 1512 Incorrect XML value: 'parse error at line 1 pos 13: unknown token unexpected (ident or '/' wanted)'
select extractValue('<.>test</.>','//*');
extractValue('<.>test</.>','//*')
NULL
Warnings:
Warning 1512 Incorrect XML value: 'parse error at line 1 pos 2: unknown token unexpected (ident or '/' wanted)'
select extractValue('<->test</->','//*');
extractValue('<->test</->','//*')
NULL
Warnings:
Warning 1512 Incorrect XML value: 'parse error at line 1 pos 2: unknown token unexpected (ident or '/' wanted)'
select extractValue('<:>test</:>','//*');
extractValue('<:>test</:>','//*')
test
select extractValue('<_>test</_>','//*');
extractValue('<_>test</_>','//*')
test
select extractValue('<x.-_:>test</x.-_:>','//*');
extractValue('<x.-_:>test</x.-_:>','//*')
test
...@@ -360,3 +360,19 @@ select extractValue('<ns:element xmlns:ns="myns">a</ns:element>','/ns:element/@x ...@@ -360,3 +360,19 @@ select extractValue('<ns:element xmlns:ns="myns">a</ns:element>','/ns:element/@x
# #
select extractValue('<foo><foo.bar>Data</foo.bar><something>Otherdata</something></foo>','/foo/foo.bar'); select extractValue('<foo><foo.bar>Data</foo.bar><something>Otherdata</something></foo>','/foo/foo.bar');
select extractValue('<foo><foo.bar>Data</foo.bar><something>Otherdata</something></foo>','/foo/something'); select extractValue('<foo><foo.bar>Data</foo.bar><something>Otherdata</something></foo>','/foo/something');
#
# Bug#20854 XML functions: wrong result in ExtractValue
#
--error 1105
select extractValue('<zot><tim0><01>10:39:15</01><02>140</02></tim0></zot>','/zot/tim0/02');
select extractValue('<zot><tim0><01>10:39:15</01><02>140</02></tim0></zot>','//*');
# dot and dash are bad identtifier start character
select extractValue('<.>test</.>','//*');
select extractValue('<->test</->','//*');
# semicolon is good identifier start character
select extractValue('<:>test</:>','//*');
# underscore is good identifier start character
select extractValue('<_>test</_>','//*');
# dot, dash, underscore and semicolon are good identifier middle characters
select extractValue('<x.-_:>test</x.-_:>','//*');
...@@ -105,6 +105,7 @@ typedef struct my_xpath_st ...@@ -105,6 +105,7 @@ typedef struct my_xpath_st
String *context_cache; /* last context provider */ String *context_cache; /* last context provider */
String *pxml; /* Parsed XML, an array of MY_XML_NODE */ String *pxml; /* Parsed XML, an array of MY_XML_NODE */
CHARSET_INFO *cs; /* character set/collation string comparison */ CHARSET_INFO *cs; /* character set/collation string comparison */
int error;
} MY_XPATH; } MY_XPATH;
...@@ -913,7 +914,9 @@ static Item *eq_func_reverse(int oper, Item *a, Item *b) ...@@ -913,7 +914,9 @@ static Item *eq_func_reverse(int oper, Item *a, Item *b)
RETURN RETURN
The newly created item. The newly created item.
*/ */
static Item *create_comparator(MY_XPATH *xpath, int oper, Item *a, Item *b) static Item *create_comparator(MY_XPATH *xpath,
int oper, MY_XPATH_LEX *context,
Item *a, Item *b)
{ {
if (a->type() != Item::XPATH_NODESET && if (a->type() != Item::XPATH_NODESET &&
b->type() != Item::XPATH_NODESET) b->type() != Item::XPATH_NODESET)
...@@ -923,6 +926,13 @@ static Item *create_comparator(MY_XPATH *xpath, int oper, Item *a, Item *b) ...@@ -923,6 +926,13 @@ static Item *create_comparator(MY_XPATH *xpath, int oper, Item *a, Item *b)
else if (a->type() == Item::XPATH_NODESET && else if (a->type() == Item::XPATH_NODESET &&
b->type() == Item::XPATH_NODESET) b->type() == Item::XPATH_NODESET)
{ {
uint len= context->end - context->beg;
set_if_bigger(len, 32);
my_printf_error(ER_UNKNOWN_ERROR,
"XPATH error: "
"comparison of two nodesets is not supported: '%.*s'",
MYF(0), len, context->beg);
return 0; // TODO: Comparison of two nodesets return 0; // TODO: Comparison of two nodesets
} }
else else
...@@ -1430,7 +1440,7 @@ my_xpath_lex_scan(MY_XPATH *xpath, ...@@ -1430,7 +1440,7 @@ my_xpath_lex_scan(MY_XPATH *xpath,
static int static int
my_xpath_parse_term(MY_XPATH *xpath, int term) my_xpath_parse_term(MY_XPATH *xpath, int term)
{ {
if (xpath->lasttok.term == term) if (xpath->lasttok.term == term && !xpath->error)
{ {
xpath->prevtok= xpath->lasttok; xpath->prevtok= xpath->lasttok;
my_xpath_lex_scan(xpath, &xpath->lasttok, my_xpath_lex_scan(xpath, &xpath->lasttok,
...@@ -1558,8 +1568,9 @@ static int my_xpath_parse_AbsoluteLocationPath(MY_XPATH *xpath) ...@@ -1558,8 +1568,9 @@ static int my_xpath_parse_AbsoluteLocationPath(MY_XPATH *xpath)
return my_xpath_parse_RelativeLocationPath(xpath); return my_xpath_parse_RelativeLocationPath(xpath);
} }
return my_xpath_parse_term(xpath, MY_XPATH_LEX_EOF) || my_xpath_parse_RelativeLocationPath(xpath);
my_xpath_parse_RelativeLocationPath(xpath);
return (xpath->error == 0);
} }
...@@ -1596,7 +1607,10 @@ static int my_xpath_parse_RelativeLocationPath(MY_XPATH *xpath) ...@@ -1596,7 +1607,10 @@ static int my_xpath_parse_RelativeLocationPath(MY_XPATH *xpath)
"*", 1, "*", 1,
xpath->pxml, 1); xpath->pxml, 1);
if (!my_xpath_parse_Step(xpath)) if (!my_xpath_parse_Step(xpath))
{
xpath->error= 1;
return 0; return 0;
}
} }
return 1; return 1;
} }
...@@ -1633,10 +1647,16 @@ my_xpath_parse_AxisSpecifier_NodeTest_opt_Predicate_list(MY_XPATH *xpath) ...@@ -1633,10 +1647,16 @@ my_xpath_parse_AxisSpecifier_NodeTest_opt_Predicate_list(MY_XPATH *xpath)
xpath->context_cache= context_cache; xpath->context_cache= context_cache;
if(!my_xpath_parse_PredicateExpr(xpath)) if(!my_xpath_parse_PredicateExpr(xpath))
{
xpath->error= 1;
return 0; return 0;
}
if (!my_xpath_parse_term(xpath, MY_XPATH_LEX_RB)) if (!my_xpath_parse_term(xpath, MY_XPATH_LEX_RB))
{
xpath->error= 1;
return 0; return 0;
}
xpath->item= nodeset2bool(xpath, xpath->item); xpath->item= nodeset2bool(xpath, xpath->item);
...@@ -1893,7 +1913,10 @@ static int my_xpath_parse_UnionExpr(MY_XPATH *xpath) ...@@ -1893,7 +1913,10 @@ static int my_xpath_parse_UnionExpr(MY_XPATH *xpath)
if (!my_xpath_parse_PathExpr(xpath) if (!my_xpath_parse_PathExpr(xpath)
|| xpath->item->type() != Item::XPATH_NODESET) || xpath->item->type() != Item::XPATH_NODESET)
{
xpath->error= 1;
return 0; return 0;
}
xpath->item= new Item_nodeset_func_union(prev, xpath->item, xpath->pxml); xpath->item= new Item_nodeset_func_union(prev, xpath->item, xpath->pxml);
} }
return 1; return 1;
...@@ -1929,6 +1952,7 @@ static int my_xpath_parse_PathExpr(MY_XPATH *xpath) ...@@ -1929,6 +1952,7 @@ static int my_xpath_parse_PathExpr(MY_XPATH *xpath)
{ {
return my_xpath_parse_LocationPath(xpath) || return my_xpath_parse_LocationPath(xpath) ||
my_xpath_parse_FilterExpr_opt_slashes_RelativeLocationPath(xpath); my_xpath_parse_FilterExpr_opt_slashes_RelativeLocationPath(xpath);
} }
...@@ -1975,7 +1999,10 @@ static int my_xpath_parse_OrExpr(MY_XPATH *xpath) ...@@ -1975,7 +1999,10 @@ static int my_xpath_parse_OrExpr(MY_XPATH *xpath)
{ {
Item *prev= xpath->item; Item *prev= xpath->item;
if (!my_xpath_parse_AndExpr(xpath)) if (!my_xpath_parse_AndExpr(xpath))
{
return 0; return 0;
xpath->error= 1;
}
xpath->item= new Item_cond_or(nodeset2bool(xpath, prev), xpath->item= new Item_cond_or(nodeset2bool(xpath, prev),
nodeset2bool(xpath, xpath->item)); nodeset2bool(xpath, xpath->item));
} }
...@@ -2003,7 +2030,10 @@ static int my_xpath_parse_AndExpr(MY_XPATH *xpath) ...@@ -2003,7 +2030,10 @@ static int my_xpath_parse_AndExpr(MY_XPATH *xpath)
{ {
Item *prev= xpath->item; Item *prev= xpath->item;
if (!my_xpath_parse_EqualityExpr(xpath)) if (!my_xpath_parse_EqualityExpr(xpath))
{
xpath->error= 1;
return 0; return 0;
}
xpath->item= new Item_cond_and(nodeset2bool(xpath,prev), xpath->item= new Item_cond_and(nodeset2bool(xpath,prev),
nodeset2bool(xpath,xpath->item)); nodeset2bool(xpath,xpath->item));
...@@ -2057,17 +2087,26 @@ static int my_xpath_parse_EqualityOperator(MY_XPATH *xpath) ...@@ -2057,17 +2087,26 @@ static int my_xpath_parse_EqualityOperator(MY_XPATH *xpath)
} }
static int my_xpath_parse_EqualityExpr(MY_XPATH *xpath) static int my_xpath_parse_EqualityExpr(MY_XPATH *xpath)
{ {
MY_XPATH_LEX operator_context;
if (!my_xpath_parse_RelationalExpr(xpath)) if (!my_xpath_parse_RelationalExpr(xpath))
return 0; return 0;
operator_context= xpath->lasttok;
while (my_xpath_parse_EqualityOperator(xpath)) while (my_xpath_parse_EqualityOperator(xpath))
{ {
Item *prev= xpath->item; Item *prev= xpath->item;
int oper= xpath->extra; int oper= xpath->extra;
if (!my_xpath_parse_RelationalExpr(xpath)) if (!my_xpath_parse_RelationalExpr(xpath))
{
xpath->error= 1;
return 0; return 0;
}
if (!(xpath->item= create_comparator(xpath, oper, prev, xpath->item))) if (!(xpath->item= create_comparator(xpath, oper, &operator_context,
prev, xpath->item)))
return 0; return 0;
operator_context= xpath->lasttok;
} }
return 1; return 1;
} }
...@@ -2109,18 +2148,25 @@ static int my_xpath_parse_RelationalOperator(MY_XPATH *xpath) ...@@ -2109,18 +2148,25 @@ static int my_xpath_parse_RelationalOperator(MY_XPATH *xpath)
} }
static int my_xpath_parse_RelationalExpr(MY_XPATH *xpath) static int my_xpath_parse_RelationalExpr(MY_XPATH *xpath)
{ {
MY_XPATH_LEX operator_context;
if (!my_xpath_parse_AdditiveExpr(xpath)) if (!my_xpath_parse_AdditiveExpr(xpath))
return 0; return 0;
operator_context= xpath->lasttok;
while (my_xpath_parse_RelationalOperator(xpath)) while (my_xpath_parse_RelationalOperator(xpath))
{ {
Item *prev= xpath->item; Item *prev= xpath->item;
int oper= xpath->extra; int oper= xpath->extra;
if (!my_xpath_parse_AdditiveExpr(xpath)) if (!my_xpath_parse_AdditiveExpr(xpath))
{
xpath->error= 1;
return 0; return 0;
}
if (!(xpath->item= create_comparator(xpath, oper, prev, xpath->item))) if (!(xpath->item= create_comparator(xpath, oper, &operator_context,
prev, xpath->item)))
return 0; return 0;
operator_context= xpath->lasttok;
} }
return 1; return 1;
} }
...@@ -2153,7 +2199,10 @@ static int my_xpath_parse_AdditiveExpr(MY_XPATH *xpath) ...@@ -2153,7 +2199,10 @@ static int my_xpath_parse_AdditiveExpr(MY_XPATH *xpath)
int oper= xpath->prevtok.term; int oper= xpath->prevtok.term;
Item *prev= xpath->item; Item *prev= xpath->item;
if (!my_xpath_parse_MultiplicativeExpr(xpath)) if (!my_xpath_parse_MultiplicativeExpr(xpath))
{
xpath->error= 1;
return 0; return 0;
}
if (oper == MY_XPATH_LEX_PLUS) if (oper == MY_XPATH_LEX_PLUS)
xpath->item= new Item_func_plus(prev, xpath->item); xpath->item= new Item_func_plus(prev, xpath->item);
...@@ -2198,7 +2247,10 @@ static int my_xpath_parse_MultiplicativeExpr(MY_XPATH *xpath) ...@@ -2198,7 +2247,10 @@ static int my_xpath_parse_MultiplicativeExpr(MY_XPATH *xpath)
int oper= xpath->prevtok.term; int oper= xpath->prevtok.term;
Item *prev= xpath->item; Item *prev= xpath->item;
if (!my_xpath_parse_UnaryExpr(xpath)) if (!my_xpath_parse_UnaryExpr(xpath))
{
xpath->error= 1;
return 0; return 0;
}
switch (oper) switch (oper)
{ {
case MY_XPATH_LEX_ASTERISK: case MY_XPATH_LEX_ASTERISK:
......
...@@ -19,6 +19,7 @@ ...@@ -19,6 +19,7 @@
#include "my_xml.h" #include "my_xml.h"
#define MY_XML_UNKNOWN 'U'
#define MY_XML_EOF 'E' #define MY_XML_EOF 'E'
#define MY_XML_STRING 'S' #define MY_XML_STRING 'S'
#define MY_XML_IDENT 'I' #define MY_XML_IDENT 'I'
...@@ -39,6 +40,46 @@ typedef struct xml_attr_st ...@@ -39,6 +40,46 @@ typedef struct xml_attr_st
} MY_XML_ATTR; } MY_XML_ATTR;
/*
XML ctype:
*/
#define MY_XML_ID0 0x01 /* Identifier initial character */
#define MY_XML_ID1 0x02 /* Identifier medial character */
#define MY_XML_SPC 0x08 /* Spacing character */
/*
http://www.w3.org/TR/REC-xml/
[4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
CombiningChar | Extender
[5] Name ::= (Letter | '_' | ':') (NameChar)*
*/
static char my_xml_ctype[256]=
{
/*00*/ 0,0,0,0,0,0,0,0,0,8,8,0,0,8,0,0,
/*10*/ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
/*20*/ 8,0,0,0,0,0,0,0,0,0,0,0,0,2,2,0, /* !"#$%&'()*+,-./ */
/*30*/ 2,2,2,2,2,2,2,2,2,2,3,0,0,0,0,0, /* 0123456789:;<=>? */
/*40*/ 0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, /* @ABCDEFGHIJKLMNO */
/*50*/ 3,3,3,3,3,3,3,3,3,3,3,0,0,0,0,3, /* PQRSTUVWXYZ[\]^_ */
/*60*/ 0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, /* `abcdefghijklmno */
/*70*/ 3,3,3,3,3,3,3,3,3,3,3,0,0,0,0,0, /* pqrstuvwxyz{|}~ */
/*80*/ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
/*90*/ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
/*A0*/ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
/*B0*/ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
/*C0*/ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
/*D0*/ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
/*E0*/ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
/*F0*/ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3
};
#define my_xml_is_space(c) (my_xml_ctype[(uchar) (c)] & MY_XML_SPC)
#define my_xml_is_id0(c) (my_xml_ctype[(uchar) (c)] & MY_XML_ID0)
#define my_xml_is_id1(c) (my_xml_ctype[(uchar) (c)] & MY_XML_ID1)
static const char *lex2str(int lex) static const char *lex2str(int lex)
{ {
switch(lex) switch(lex)
...@@ -56,13 +97,13 @@ static const char *lex2str(int lex) ...@@ -56,13 +97,13 @@ static const char *lex2str(int lex)
case MY_XML_QUESTION: return "'?'"; case MY_XML_QUESTION: return "'?'";
case MY_XML_EXCLAM: return "'!'"; case MY_XML_EXCLAM: return "'!'";
} }
return "UNKNOWN"; return "unknown token";
} }
static void my_xml_norm_text(MY_XML_ATTR *a) static void my_xml_norm_text(MY_XML_ATTR *a)
{ {
for ( ; (a->beg < a->end) && strchr(" \t\r\n",a->beg[0]) ; a->beg++ ); for ( ; (a->beg < a->end) && my_xml_is_space(a->beg[0]) ; a->beg++ );
for ( ; (a->beg < a->end) && strchr(" \t\r\n",a->end[-1]) ; a->end-- ); for ( ; (a->beg < a->end) && my_xml_is_space(a->end[-1]) ; a->end-- );
} }
...@@ -70,7 +111,7 @@ static int my_xml_scan(MY_XML_PARSER *p,MY_XML_ATTR *a) ...@@ -70,7 +111,7 @@ static int my_xml_scan(MY_XML_PARSER *p,MY_XML_ATTR *a)
{ {
int lex; int lex;
for( ; ( p->cur < p->end) && strchr(" \t\r\n",p->cur[0]) ; p->cur++); for( ; ( p->cur < p->end) && my_xml_is_space(p->cur[0]) ; p->cur++);
if (p->cur >= p->end) if (p->cur >= p->end)
{ {
...@@ -124,16 +165,17 @@ static int my_xml_scan(MY_XML_PARSER *p,MY_XML_ATTR *a) ...@@ -124,16 +165,17 @@ static int my_xml_scan(MY_XML_PARSER *p,MY_XML_ATTR *a)
my_xml_norm_text(a); my_xml_norm_text(a);
lex=MY_XML_STRING; lex=MY_XML_STRING;
} }
else else if (my_xml_is_id0(p->cur[0]))
{ {
for(; p->cur++;
(p->cur < p->end) && !strchr("?'\"=/<> \t\r\n", p->cur[0]); while (p->cur < p->end && my_xml_is_id1(p->cur[0]))
p->cur++) p->cur++;
{}
a->end=p->cur; a->end=p->cur;
my_xml_norm_text(a); my_xml_norm_text(a);
lex=MY_XML_IDENT; lex=MY_XML_IDENT;
} }
else
lex= MY_XML_UNKNOWN;
#if 0 #if 0
printf("LEX=%s[%d]\n",lex2str(lex),a->end-a->beg); printf("LEX=%s[%d]\n",lex2str(lex),a->end-a->beg);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment