Commit 92bd6801 authored by Alexander Barkov's avatar Alexander Barkov

A joint patch for:

- MDEV-5689 ExtractValue(xml, 'substring(/x,/y)') crashes
- MDEV-5709 ExtractValue() with XPath variable references returns wrong result.

Description:

1. The main problem was that that nodeset_func->fix_fields() was
called in Item_func_xml_extractvalue::val_str() and
Item_func_xml_update::val_str(), which led in some cases to
execution of the XPath engine *before* having a parsed XML value.
Moved to Item_xml_str_func::fix_fields().

2. Cleanup: added a new method Item_xml_str_func::fix_fields() and moved
most of the code from Item_xml_str_func::fix_length_and_dec()
to Item_xml_str_func::fix_fields(), to follow the usual Item layout.

3. Cleanup: a parsed XML value is useless without the raw XML value
it was built from.

Previously the parsed and the raw values where stored in separate String
instances. It was hard to follow how they are synchronized.
Added a helper class XML which contains both parsed and raw values.
Makes things easier to read and modify.

4. MDEV-5709: const_item() could incorrectly return a "true"
result when XPath expression contains users/SP variable references.
Now nodeset_func->const_item() is also taken into account to
catch such cases.

5. Minor code enhancements.
parent e0f75b1b
......@@ -1214,3 +1214,41 @@ DROP TABLE t1;
#
# End of 5.5 tests
#
#
# Start of 10.0 tests
#
#
# MDEV-5689 ExtractValue(xml, 'substring(/x,/y)') crashes
#
SELECT ExtractValue('<a><b>abc</b><c>2</c><d>1</d></a>','substring(/a/b,..)') AS e;
e
SELECT ExtractValue('<a><b>abc</b><c>2</c><d>1</d></a>','substring(/a/b,/a/c)') AS e;
e
bc
SELECT ExtractValue('<a><b>abc</b><c>2</c><d>1</d></a>','substring(/a/b,/a/d)') AS e;
e
abc
SELECT ExtractValue('<a><b>abc</b><c>2</c><d>1</d></a>','substring(/a/b,/a/c,/a/d)') AS e;
e
b
SELECT ExtractValue('<a><b>abc</b><c>2</c><d>1</d></a>','substring(/a/b,/a/d,/a/c)') AS e;
e
ab
#
# MDEV-5709 ExtractValue() with XPath variable references returns wrong result
#
CREATE TABLE t1 (c1 INT, c2 VARCHAR(10));
INSERT INTO t1 VALUES (1,'b1'),(2,'b2');
SELECT *,IF(@i:=c1,ExtractValue('<a><b>b1</b><b>b2</b></a>','//b[$@i]'),0) AS xpath FROM t1;
c1 c2 xpath
1 b1 b1
2 b2 b2
SELECT * FROM t1 WHERE c2=IF(@i:=c1,ExtractValue('<a><b>b1</b><b>b2</b></a>','//b[$@i]'),0);
c1 c2
1 b1
2 b2
DROP TABLE t1;
#
# End of 10.0 tests
#
......@@ -707,6 +707,36 @@ INSERT INTO t1 VALUES (CONCAT('<a><', REPEAT('b',128),'>b128</',REPEAT('b',128),
SELECT ExtractValue (a, CONCAT('//',REPEAT('c',512))) AS c512 FROM t1;
DROP TABLE t1;
--horizontal_results
--echo #
--echo # End of 5.5 tests
--echo #
--echo #
--echo # Start of 10.0 tests
--echo #
--echo #
--echo # MDEV-5689 ExtractValue(xml, 'substring(/x,/y)') crashes
--echo #
SELECT ExtractValue('<a><b>abc</b><c>2</c><d>1</d></a>','substring(/a/b,..)') AS e;
SELECT ExtractValue('<a><b>abc</b><c>2</c><d>1</d></a>','substring(/a/b,/a/c)') AS e;
SELECT ExtractValue('<a><b>abc</b><c>2</c><d>1</d></a>','substring(/a/b,/a/d)') AS e;
SELECT ExtractValue('<a><b>abc</b><c>2</c><d>1</d></a>','substring(/a/b,/a/c,/a/d)') AS e;
SELECT ExtractValue('<a><b>abc</b><c>2</c><d>1</d></a>','substring(/a/b,/a/d,/a/c)') AS e;
--echo #
--echo # MDEV-5709 ExtractValue() with XPath variable references returns wrong result
--echo #
CREATE TABLE t1 (c1 INT, c2 VARCHAR(10));
INSERT INTO t1 VALUES (1,'b1'),(2,'b2');
SELECT *,IF(@i:=c1,ExtractValue('<a><b>b1</b><b>b2</b></a>','//b[$@i]'),0) AS xpath FROM t1;
SELECT * FROM t1 WHERE c2=IF(@i:=c1,ExtractValue('<a><b>b1</b><b>b2</b></a>','//b[$@i]'),0);
DROP TABLE t1;
--echo #
--echo # End of 10.0 tests
--echo #
......@@ -2599,17 +2599,25 @@ my_xpath_parse(MY_XPATH *xpath, const char *str, const char *strend)
void Item_xml_str_func::fix_length_and_dec()
{
max_length= MAX_BLOB_WIDTH;
agg_arg_charsets_for_comparison(collation, args, arg_count);
}
bool Item_xml_str_func::fix_fields(THD *thd, Item **ref)
{
String *xp, tmp;
MY_XPATH xpath;
int rc;
if (Item_str_func::fix_fields(thd, ref))
return true;
status_var_increment(current_thd->status_var.feature_xml);
nodeset_func= 0;
if (agg_arg_charsets_for_comparison(collation, args, arg_count))
return;
if (collation.collation->mbminlen > 1)
{
......@@ -2617,23 +2625,23 @@ void Item_xml_str_func::fix_length_and_dec()
my_printf_error(ER_UNKNOWN_ERROR,
"Character set '%s' is not supported by XPATH",
MYF(0), collation.collation->csname);
return;
return true;
}
if (!args[1]->const_item())
{
my_printf_error(ER_UNKNOWN_ERROR,
"Only constant XPATH queries are supported", MYF(0));
return;
return true;
}
if (!(xp= args[1]->val_str(&tmp)))
return;
return false; // Will return NULL
my_xpath_init(&xpath);
xpath.cs= collation.collation;
xpath.debug= 0;
xpath.pxml= &pxml;
pxml.set_charset(collation.collation);
xpath.pxml= xml.parsed();
xml.set_charset(collation.collation);
rc= my_xpath_parse(&xpath, xp->ptr(), xp->ptr() + xp->length());
......@@ -2643,13 +2651,24 @@ void Item_xml_str_func::fix_length_and_dec()
set_if_smaller(clen, 32);
my_printf_error(ER_UNKNOWN_ERROR, "XPATH syntax error: '%.*s'",
MYF(0), clen, xpath.lasttok.beg);
return;
return true;
}
nodeset_func= xpath.item;
if (nodeset_func)
nodeset_func->fix_fields(current_thd, &nodeset_func);
max_length= MAX_BLOB_WIDTH;
/*
Parsing XML is a heavy operation, so if the first argument is constant,
then parse XML only one time and cache the parsed representation
together with raw text representation.
Note, we cannot cache the entire function result even if
the first and the second arguments are constants, because
the XPath expression may have user and SP variable references,
so the function result can vary between executions.
*/
if ((args[0]->const_item() && get_xml(&xml, true)) ||
!(nodeset_func= xpath.item))
return false; // Will return NULL
return nodeset_func->fix_fields(thd, &nodeset_func);
}
......@@ -2780,25 +2799,24 @@ int xml_leave(MY_XML_PARSER *st,const char *attr, size_t len)
Parse raw XML
SYNOPSYS
RETURN
Currently pointer to parsed XML on success
0 on parse error
false on success
true on error
*/
String *Item_xml_str_func::parse_xml(String *raw_xml, String *parsed_xml_buf)
bool Item_xml_str_func::XML::parse()
{
MY_XML_PARSER p;
MY_XML_USER_DATA user_data;
int rc;
parsed_xml_buf->length(0);
m_parsed_buf.length(0);
/* Prepare XML parser */
my_xml_parser_create(&p);
p.flags= MY_XML_FLAG_RELATIVE_NAMES | MY_XML_FLAG_SKIP_TEXT_NORMALIZATION;
user_data.level= 0;
user_data.pxml= parsed_xml_buf;
user_data.pxml= &m_parsed_buf;
user_data.parent= 0;
my_xml_set_enter_handler(&p, xml_enter);
my_xml_set_value_handler(&p, xml_value);
......@@ -2807,10 +2825,10 @@ String *Item_xml_str_func::parse_xml(String *raw_xml, String *parsed_xml_buf)
/* Add root node */
p.current_node_type= MY_XML_NODE_TAG;
xml_enter(&p, raw_xml->ptr(), 0);
xml_enter(&p, m_raw_ptr->ptr(), 0);
/* Execute XML parser */
if ((rc= my_xml_parse(&p, raw_xml->ptr(), raw_xml->length())) != MY_XML_OK)
if ((rc= my_xml_parse(&p, m_raw_ptr->ptr(), m_raw_ptr->length())) != MY_XML_OK)
{
char buf[128];
my_snprintf(buf, sizeof(buf)-1, "parse error at line %d pos %lu: %s",
......@@ -2820,10 +2838,41 @@ String *Item_xml_str_func::parse_xml(String *raw_xml, String *parsed_xml_buf)
push_warning_printf(current_thd, Sql_condition::WARN_LEVEL_WARN,
ER_WRONG_VALUE,
ER(ER_WRONG_VALUE), "XML", buf);
m_raw_ptr= (String *) 0;
}
my_xml_parser_free(&p);
return rc == MY_XML_OK ? parsed_xml_buf : 0;
return rc != MY_XML_OK;
}
/*
Parse the raw XML from the given source,
optionally cache the raw XML,
remember the pointer to the raw XML.
*/
bool Item_xml_str_func::XML::parse(String *raw_xml, bool cache)
{
m_raw_ptr= raw_xml;
if (cache)
{
m_cached= true;
if (m_raw_ptr != &m_raw_buf && m_raw_buf.copy(*m_raw_ptr))
{
m_raw_ptr= (String *) 0;
return true;
}
m_raw_ptr= &m_raw_buf;
}
return parse();
}
const MY_XML_NODE *Item_xml_str_func::XML::node(uint idx)
{
const MY_XML_NODE *nodebeg= (MY_XML_NODE*) m_parsed_buf.ptr();
DBUG_ASSERT(idx < m_parsed_buf.length() / sizeof (MY_XML_NODE));
return nodebeg + idx;
}
......@@ -2831,10 +2880,8 @@ String *Item_func_xml_extractvalue::val_str(String *str)
{
String *res;
null_value= 0;
if (!nodeset_func ||
!(res= args[0]->val_str(str)) ||
!parse_xml(res, &pxml) ||
!(res= nodeset_func->val_str(&tmp_value)))
if (!nodeset_func || get_xml(&xml) ||
!(res= nodeset_func->val_str(str)))
{
null_value= 1;
return 0;
......@@ -2843,22 +2890,37 @@ String *Item_func_xml_extractvalue::val_str(String *str)
}
bool Item_func_xml_update::collect_result(String *str,
const MY_XML_NODE *cut,
const String *replace)
{
uint offs= cut->type == MY_XML_NODE_TAG ? 1 : 0;
const char *end= cut->tagend + offs;
str->length(0);
str->set_charset(collation.collation);
return
/* Put the XML part preceeding the replaced piece */
str->append(xml.raw()->ptr(), cut->beg - xml.raw()->ptr() - offs) ||
/* Put the replacement */
str->append(replace->ptr(), replace->length()) ||
/* Put the XML part following the replaced piece */
str->append(end, xml.raw()->ptr() + xml.raw()->length() - end);
}
String *Item_func_xml_update::val_str(String *str)
{
String *res, *nodeset, *rep;
String *nodeset, *rep;
null_value= 0;
if (!nodeset_func ||
!(res= args[0]->val_str(str)) ||
if (!nodeset_func || get_xml(&xml) ||
!(rep= args[2]->val_str(&tmp_value3)) ||
!parse_xml(res, &pxml) ||
!(nodeset= nodeset_func->val_nodeset(&tmp_value2)))
{
null_value= 1;
return 0;
}
MY_XML_NODE *nodebeg= (MY_XML_NODE*) pxml.ptr();
MY_XPATH_FLT *fltbeg= (MY_XPATH_FLT*) nodeset->ptr();
MY_XPATH_FLT *fltend= (MY_XPATH_FLT*) (nodeset->ptr() + nodeset->length());
......@@ -2866,10 +2928,10 @@ String *Item_func_xml_update::val_str(String *str)
if (fltend - fltbeg != 1)
{
/* TODO: perhaps add a warning that more than one tag selected */
return res;
return xml.raw();
}
nodebeg+= fltbeg->num;
const MY_XML_NODE *nodebeg= xml.node(fltbeg->num);
if (!nodebeg->level)
{
......@@ -2881,12 +2943,5 @@ String *Item_func_xml_update::val_str(String *str)
return rep;
}
tmp_value.length(0);
tmp_value.set_charset(collation.collation);
uint offs= nodebeg->type == MY_XML_NODE_TAG ? 1 : 0;
tmp_value.append(res->ptr(), nodebeg->beg - res->ptr() - offs);
tmp_value.append(rep->ptr(), rep->length());
const char *end= nodebeg->tagend + offs;
tmp_value.append(end, res->ptr() + res->length() - end);
return &tmp_value;
return collect_result(str, nodebeg, rep) ? (String *) NULL : str;
}
......@@ -26,11 +26,55 @@
#endif
typedef struct my_xml_node_st MY_XML_NODE;
class Item_xml_str_func: public Item_str_func
{
protected:
String tmp_value, pxml;
/*
A helper class to store raw and parsed XML.
*/
class XML
{
bool m_cached;
String *m_raw_ptr; // Pointer to text representation
String m_raw_buf; // Cached text representation
String m_parsed_buf; // Array of MY_XML_NODEs, pointing to raw_buffer
bool parse();
void reset()
{
m_cached= false;
m_raw_ptr= (String *) 0;
}
public:
XML() { reset(); }
void set_charset(CHARSET_INFO *cs) { m_parsed_buf.set_charset(cs); }
String *raw() { return m_raw_ptr; }
String *parsed() { return &m_parsed_buf; }
const MY_XML_NODE *node(uint idx);
bool cached() { return m_cached; }
bool parse(String *raw, bool cache);
bool parse(Item *item, bool cache)
{
String *res;
if (!(res= item->val_str(&m_raw_buf)))
{
m_raw_ptr= (String *) 0;
m_cached= cache;
return true;
}
return parse(res, cache);
}
};
Item *nodeset_func;
XML xml;
bool get_xml(XML *xml, bool cache= false)
{
if (!cache && xml->cached())
return xml->raw() == 0;
return xml->parse(args[0], cache);
}
public:
Item_xml_str_func(Item *a, Item *b):
Item_str_func(a,b)
......@@ -42,8 +86,12 @@ class Item_xml_str_func: public Item_str_func
{
maybe_null= TRUE;
}
bool fix_fields(THD *thd, Item **ref);
void fix_length_and_dec();
String *parse_xml(String *raw_xml, String *parsed_xml_buf);
bool const_item() const
{
return const_item_cache && (!nodeset_func || nodeset_func->const_item());
}
bool check_vcol_func_processor(uchar *int_arg)
{
return trace_unsupported_by_check_vcol_func_processor(func_name());
......@@ -63,6 +111,9 @@ class Item_func_xml_extractvalue: public Item_xml_str_func
class Item_func_xml_update: public Item_xml_str_func
{
String tmp_value2, tmp_value3;
bool collect_result(String *str,
const MY_XML_NODE *cut,
const String *replace);
public:
Item_func_xml_update(Item *a,Item *b,Item *c) :Item_xml_str_func(a,b,c) {}
const char *func_name() const { return "updatexml"; }
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment