Commit 32ede45d authored by unknown's avatar unknown

Bug#20854 XML functions: wrong result in ExtractValue


mysql-test/r/xml.result:
  - Adding test case
  - Fixing error message
mysql-test/t/xml.test:
  Adding test case
sql/item_xmlfunc.cc:
  For grammar rules with loops like:
  
    AdditiveExpr ::= MultiplicativeExpr ('+' MultiplicativeExpr)*
  
  If we scanned scanned '+' and then met an error when parsing
  MultiplicativeExpr, then we should fully stop parsing - without
  trying to apply any other rules.
  
  Fix: add "error" member into MY_XPATH structure,
  and make my_xpath_parse_term() never return success
  as soon as error set.
strings/xml.c:
  Adding my_xml_ctype map for flags, indicating
  whether a character is a space character, is a
  valid identifier start character, is a valid
  identifier body character. Using this map to
  properly scan identifiers. Also, using this map
  to scan spaces faster (instead of strchr).
parent 9e89ea6f
......@@ -570,7 +570,7 @@ select extractvalue('<a>a<b>B</b></a>','a|/b');
extractvalue('<a>a<b>B</b></a>','a|/b')
a
select extractvalue('<a>A</a>','/<a>');
ERROR HY000: XPATH syntax error: '<a>'
ERROR HY000: XPATH error: comparison of two nodesets is not supported: '<a>'
select extractvalue('<a><b>b</b><b!>b!</b!></a>','//b!');
ERROR HY000: XPATH syntax error: '!'
select extractvalue('<a>A<b>B<c>C</c></b></a>','/a/descendant::*');
......@@ -710,3 +710,29 @@ Data
select extractValue('<foo><foo.bar>Data</foo.bar><something>Otherdata</something></foo>','/foo/something');
extractValue('<foo><foo.bar>Data</foo.bar><something>Otherdata</something></foo>','/foo/something')
Otherdata
select extractValue('<zot><tim0><01>10:39:15</01><02>140</02></tim0></zot>','/zot/tim0/02');
ERROR HY000: XPATH syntax error: '02'
select extractValue('<zot><tim0><01>10:39:15</01><02>140</02></tim0></zot>','//*');
extractValue('<zot><tim0><01>10:39:15</01><02>140</02></tim0></zot>','//*')
NULL
Warnings:
Warning 1512 Incorrect XML value: 'parse error at line 1 pos 13: unknown token unexpected (ident or '/' wanted)'
select extractValue('<.>test</.>','//*');
extractValue('<.>test</.>','//*')
NULL
Warnings:
Warning 1512 Incorrect XML value: 'parse error at line 1 pos 2: unknown token unexpected (ident or '/' wanted)'
select extractValue('<->test</->','//*');
extractValue('<->test</->','//*')
NULL
Warnings:
Warning 1512 Incorrect XML value: 'parse error at line 1 pos 2: unknown token unexpected (ident or '/' wanted)'
select extractValue('<:>test</:>','//*');
extractValue('<:>test</:>','//*')
test
select extractValue('<_>test</_>','//*');
extractValue('<_>test</_>','//*')
test
select extractValue('<x.-_:>test</x.-_:>','//*');
extractValue('<x.-_:>test</x.-_:>','//*')
test
......@@ -360,3 +360,19 @@ select extractValue('<ns:element xmlns:ns="myns">a</ns:element>','/ns:element/@x
#
select extractValue('<foo><foo.bar>Data</foo.bar><something>Otherdata</something></foo>','/foo/foo.bar');
select extractValue('<foo><foo.bar>Data</foo.bar><something>Otherdata</something></foo>','/foo/something');
#
# Bug#20854 XML functions: wrong result in ExtractValue
#
--error 1105
select extractValue('<zot><tim0><01>10:39:15</01><02>140</02></tim0></zot>','/zot/tim0/02');
select extractValue('<zot><tim0><01>10:39:15</01><02>140</02></tim0></zot>','//*');
# dot and dash are bad identtifier start character
select extractValue('<.>test</.>','//*');
select extractValue('<->test</->','//*');
# semicolon is good identifier start character
select extractValue('<:>test</:>','//*');
# underscore is good identifier start character
select extractValue('<_>test</_>','//*');
# dot, dash, underscore and semicolon are good identifier middle characters
select extractValue('<x.-_:>test</x.-_:>','//*');
......@@ -105,6 +105,7 @@ typedef struct my_xpath_st
String *context_cache; /* last context provider */
String *pxml; /* Parsed XML, an array of MY_XML_NODE */
CHARSET_INFO *cs; /* character set/collation string comparison */
int error;
} MY_XPATH;
......@@ -913,7 +914,9 @@ static Item *eq_func_reverse(int oper, Item *a, Item *b)
RETURN
The newly created item.
*/
static Item *create_comparator(MY_XPATH *xpath, int oper, Item *a, Item *b)
static Item *create_comparator(MY_XPATH *xpath,
int oper, MY_XPATH_LEX *context,
Item *a, Item *b)
{
if (a->type() != Item::XPATH_NODESET &&
b->type() != Item::XPATH_NODESET)
......@@ -923,6 +926,13 @@ static Item *create_comparator(MY_XPATH *xpath, int oper, Item *a, Item *b)
else if (a->type() == Item::XPATH_NODESET &&
b->type() == Item::XPATH_NODESET)
{
uint len= context->end - context->beg;
set_if_bigger(len, 32);
my_printf_error(ER_UNKNOWN_ERROR,
"XPATH error: "
"comparison of two nodesets is not supported: '%.*s'",
MYF(0), len, context->beg);
return 0; // TODO: Comparison of two nodesets
}
else
......@@ -1430,7 +1440,7 @@ my_xpath_lex_scan(MY_XPATH *xpath,
static int
my_xpath_parse_term(MY_XPATH *xpath, int term)
{
if (xpath->lasttok.term == term)
if (xpath->lasttok.term == term && !xpath->error)
{
xpath->prevtok= xpath->lasttok;
my_xpath_lex_scan(xpath, &xpath->lasttok,
......@@ -1558,8 +1568,9 @@ static int my_xpath_parse_AbsoluteLocationPath(MY_XPATH *xpath)
return my_xpath_parse_RelativeLocationPath(xpath);
}
return my_xpath_parse_term(xpath, MY_XPATH_LEX_EOF) ||
my_xpath_parse_RelativeLocationPath(xpath);
my_xpath_parse_RelativeLocationPath(xpath);
return (xpath->error == 0);
}
......@@ -1596,7 +1607,10 @@ static int my_xpath_parse_RelativeLocationPath(MY_XPATH *xpath)
"*", 1,
xpath->pxml, 1);
if (!my_xpath_parse_Step(xpath))
{
xpath->error= 1;
return 0;
}
}
return 1;
}
......@@ -1633,10 +1647,16 @@ my_xpath_parse_AxisSpecifier_NodeTest_opt_Predicate_list(MY_XPATH *xpath)
xpath->context_cache= context_cache;
if(!my_xpath_parse_PredicateExpr(xpath))
{
xpath->error= 1;
return 0;
}
if (!my_xpath_parse_term(xpath, MY_XPATH_LEX_RB))
{
xpath->error= 1;
return 0;
}
xpath->item= nodeset2bool(xpath, xpath->item);
......@@ -1893,7 +1913,10 @@ static int my_xpath_parse_UnionExpr(MY_XPATH *xpath)
if (!my_xpath_parse_PathExpr(xpath)
|| xpath->item->type() != Item::XPATH_NODESET)
{
xpath->error= 1;
return 0;
}
xpath->item= new Item_nodeset_func_union(prev, xpath->item, xpath->pxml);
}
return 1;
......@@ -1929,6 +1952,7 @@ static int my_xpath_parse_PathExpr(MY_XPATH *xpath)
{
return my_xpath_parse_LocationPath(xpath) ||
my_xpath_parse_FilterExpr_opt_slashes_RelativeLocationPath(xpath);
}
......@@ -1975,7 +1999,10 @@ static int my_xpath_parse_OrExpr(MY_XPATH *xpath)
{
Item *prev= xpath->item;
if (!my_xpath_parse_AndExpr(xpath))
{
return 0;
xpath->error= 1;
}
xpath->item= new Item_cond_or(nodeset2bool(xpath, prev),
nodeset2bool(xpath, xpath->item));
}
......@@ -2003,7 +2030,10 @@ static int my_xpath_parse_AndExpr(MY_XPATH *xpath)
{
Item *prev= xpath->item;
if (!my_xpath_parse_EqualityExpr(xpath))
{
xpath->error= 1;
return 0;
}
xpath->item= new Item_cond_and(nodeset2bool(xpath,prev),
nodeset2bool(xpath,xpath->item));
......@@ -2057,17 +2087,26 @@ static int my_xpath_parse_EqualityOperator(MY_XPATH *xpath)
}
static int my_xpath_parse_EqualityExpr(MY_XPATH *xpath)
{
MY_XPATH_LEX operator_context;
if (!my_xpath_parse_RelationalExpr(xpath))
return 0;
operator_context= xpath->lasttok;
while (my_xpath_parse_EqualityOperator(xpath))
{
Item *prev= xpath->item;
int oper= xpath->extra;
if (!my_xpath_parse_RelationalExpr(xpath))
{
xpath->error= 1;
return 0;
}
if (!(xpath->item= create_comparator(xpath, oper, prev, xpath->item)))
if (!(xpath->item= create_comparator(xpath, oper, &operator_context,
prev, xpath->item)))
return 0;
operator_context= xpath->lasttok;
}
return 1;
}
......@@ -2109,18 +2148,25 @@ static int my_xpath_parse_RelationalOperator(MY_XPATH *xpath)
}
static int my_xpath_parse_RelationalExpr(MY_XPATH *xpath)
{
MY_XPATH_LEX operator_context;
if (!my_xpath_parse_AdditiveExpr(xpath))
return 0;
operator_context= xpath->lasttok;
while (my_xpath_parse_RelationalOperator(xpath))
{
Item *prev= xpath->item;
int oper= xpath->extra;
if (!my_xpath_parse_AdditiveExpr(xpath))
{
xpath->error= 1;
return 0;
}
if (!(xpath->item= create_comparator(xpath, oper, prev, xpath->item)))
if (!(xpath->item= create_comparator(xpath, oper, &operator_context,
prev, xpath->item)))
return 0;
operator_context= xpath->lasttok;
}
return 1;
}
......@@ -2153,7 +2199,10 @@ static int my_xpath_parse_AdditiveExpr(MY_XPATH *xpath)
int oper= xpath->prevtok.term;
Item *prev= xpath->item;
if (!my_xpath_parse_MultiplicativeExpr(xpath))
{
xpath->error= 1;
return 0;
}
if (oper == MY_XPATH_LEX_PLUS)
xpath->item= new Item_func_plus(prev, xpath->item);
......@@ -2198,7 +2247,10 @@ static int my_xpath_parse_MultiplicativeExpr(MY_XPATH *xpath)
int oper= xpath->prevtok.term;
Item *prev= xpath->item;
if (!my_xpath_parse_UnaryExpr(xpath))
{
xpath->error= 1;
return 0;
}
switch (oper)
{
case MY_XPATH_LEX_ASTERISK:
......
......@@ -19,6 +19,7 @@
#include "my_xml.h"
#define MY_XML_UNKNOWN 'U'
#define MY_XML_EOF 'E'
#define MY_XML_STRING 'S'
#define MY_XML_IDENT 'I'
......@@ -39,6 +40,46 @@ typedef struct xml_attr_st
} MY_XML_ATTR;
/*
XML ctype:
*/
#define MY_XML_ID0 0x01 /* Identifier initial character */
#define MY_XML_ID1 0x02 /* Identifier medial character */
#define MY_XML_SPC 0x08 /* Spacing character */
/*
http://www.w3.org/TR/REC-xml/
[4] NameChar ::= Letter | Digit | '.' | '-' | '_' | ':' |
CombiningChar | Extender
[5] Name ::= (Letter | '_' | ':') (NameChar)*
*/
static char my_xml_ctype[256]=
{
/*00*/ 0,0,0,0,0,0,0,0,0,8,8,0,0,8,0,0,
/*10*/ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
/*20*/ 8,0,0,0,0,0,0,0,0,0,0,0,0,2,2,0, /* !"#$%&'()*+,-./ */
/*30*/ 2,2,2,2,2,2,2,2,2,2,3,0,0,0,0,0, /* 0123456789:;<=>? */
/*40*/ 0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, /* @ABCDEFGHIJKLMNO */
/*50*/ 3,3,3,3,3,3,3,3,3,3,3,0,0,0,0,3, /* PQRSTUVWXYZ[\]^_ */
/*60*/ 0,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, /* `abcdefghijklmno */
/*70*/ 3,3,3,3,3,3,3,3,3,3,3,0,0,0,0,0, /* pqrstuvwxyz{|}~ */
/*80*/ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
/*90*/ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
/*A0*/ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
/*B0*/ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
/*C0*/ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
/*D0*/ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
/*E0*/ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
/*F0*/ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3
};
#define my_xml_is_space(c) (my_xml_ctype[(uchar) (c)] & MY_XML_SPC)
#define my_xml_is_id0(c) (my_xml_ctype[(uchar) (c)] & MY_XML_ID0)
#define my_xml_is_id1(c) (my_xml_ctype[(uchar) (c)] & MY_XML_ID1)
static const char *lex2str(int lex)
{
switch(lex)
......@@ -56,13 +97,13 @@ static const char *lex2str(int lex)
case MY_XML_QUESTION: return "'?'";
case MY_XML_EXCLAM: return "'!'";
}
return "UNKNOWN";
return "unknown token";
}
static void my_xml_norm_text(MY_XML_ATTR *a)
{
for ( ; (a->beg < a->end) && strchr(" \t\r\n",a->beg[0]) ; a->beg++ );
for ( ; (a->beg < a->end) && strchr(" \t\r\n",a->end[-1]) ; a->end-- );
for ( ; (a->beg < a->end) && my_xml_is_space(a->beg[0]) ; a->beg++ );
for ( ; (a->beg < a->end) && my_xml_is_space(a->end[-1]) ; a->end-- );
}
......@@ -70,7 +111,7 @@ static int my_xml_scan(MY_XML_PARSER *p,MY_XML_ATTR *a)
{
int lex;
for( ; ( p->cur < p->end) && strchr(" \t\r\n",p->cur[0]) ; p->cur++);
for( ; ( p->cur < p->end) && my_xml_is_space(p->cur[0]) ; p->cur++);
if (p->cur >= p->end)
{
......@@ -124,16 +165,17 @@ static int my_xml_scan(MY_XML_PARSER *p,MY_XML_ATTR *a)
my_xml_norm_text(a);
lex=MY_XML_STRING;
}
else
else if (my_xml_is_id0(p->cur[0]))
{
for(;
(p->cur < p->end) && !strchr("?'\"=/<> \t\r\n", p->cur[0]);
p->cur++)
{}
p->cur++;
while (p->cur < p->end && my_xml_is_id1(p->cur[0]))
p->cur++;
a->end=p->cur;
my_xml_norm_text(a);
lex=MY_XML_IDENT;
}
else
lex= MY_XML_UNKNOWN;
#if 0
printf("LEX=%s[%d]\n",lex2str(lex),a->end-a->beg);
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment