Commit f41aadfa authored by Alexander Barkov's avatar Alexander Barkov

Adding DATA_CHARSET table option.

parent 8e1c1949
......@@ -271,6 +271,28 @@ DROP TABLE t1;
CREATE TABLE t1
(
c CHAR(16)
) ENGINE=CONNECT TABLE_TYPE=XML FILE_NAME='latin1.xml'
OPTION_LIST='xmlsup=libxml2'
DATA_CHARSET=latin1;
ERROR HY000: DATA_CHARSET='latin1' is not supported for TABLE_TYPE=XML
CREATE TABLE t1
(
c CHAR(16)
) ENGINE=CONNECT TABLE_TYPE=XML FILE_NAME='latin1.xml'
OPTION_LIST='xmlsup=libxml2'
DATA_CHARSET=utf8;
SHOW CREATE TABLE t1;
Table t1
Create Table CREATE TABLE `t1` (
`c` char(16) DEFAULT NULL
) ENGINE=CONNECT DEFAULT CHARSET=latin1 `TABLE_TYPE`=XML `FILE_NAME`='latin1.xml' `OPTION_LIST`='xmlsup=libxml2' `DATA_CHARSET`=utf8
SELECT c, HEX(c) FROM t1;
c ÁÂÃÄÅÆÇ
HEX(c) C1C2C3C4C5C6C7
DROP TABLE t1;
CREATE TABLE t1
(
c CHAR(16)
) ENGINE=CONNECT TABLE_TYPE=XML FILE_NAME='latin1.xml'
OPTION_LIST='xmlsup=libxml2';
SELECT c, HEX(c) FROM t1;
......@@ -301,5 +323,39 @@ HEX(c) 3F3F3F3F3F3F3F
Warnings:
Level Warning
Code 1366
Message Incorrect string value: '\xC1\xC2\xC3\xC4\xC5\xC6...' for column 'c' at row 1
Message Incorrect string value: '\xC3\x81\xC3\x82\xC3\x83...' for column 'c' at row 1
DROP TABLE t1;
#
# Testing Cyrillic
#
CREATE TABLE t1
(
c CHAR(16) CHARACTER SET utf8
) ENGINE=CONNECT TABLE_TYPE=XML FILE_NAME='cp1251.xml'
OPTION_LIST='xmlsup=libxml2,rownode=b';
SELECT * FROM t1;
c БВГДЕЖЗ
INSERT INTO t1 VALUES ('ИКЛМН');
SELECT c, HEX(c) FROM t1;
c БВГДЕЖЗ
HEX(c) D091D092D093D094D095D096D097
c ИКЛМН
HEX(c) D098D09AD09BD09CD09D
DROP TABLE t1;
CREATE TABLE t1
(
c CHAR(16) CHARACTER SET cp1251
) ENGINE=CONNECT TABLE_TYPE=XML FILE_NAME='cp1251.xml'
OPTION_LIST='xmlsup=libxml2,rownode=b';
SELECT * FROM t1;
c БВГДЕЖЗ
c ИКЛМН
INSERT INTO t1 VALUES ('ОПРСТ');
SELECT c, HEX(c) FROM t1;
c БВГДЕЖЗ
HEX(c) C1C2C3C4C5C6C7
c ИКЛМН
HEX(c) C8CACBCCCD
c ОПРСТ
HEX(c) CECFD0D1D2
DROP TABLE t1;
<?xml version="1.0" encoding="utf-8"?>
<?xml version="1.0" encoding="cp1251"?>
<a><b><c></c></b></a>
......@@ -199,6 +199,25 @@ DROP TABLE t1;
--echo #
--echo # Testing character sets
--echo #
--error ER_UNKNOWN_ERROR
CREATE TABLE t1
(
c CHAR(16)
) ENGINE=CONNECT TABLE_TYPE=XML FILE_NAME='latin1.xml'
OPTION_LIST='xmlsup=libxml2'
DATA_CHARSET=latin1;
CREATE TABLE t1
(
c CHAR(16)
) ENGINE=CONNECT TABLE_TYPE=XML FILE_NAME='latin1.xml'
OPTION_LIST='xmlsup=libxml2'
DATA_CHARSET=utf8;
SHOW CREATE TABLE t1;
SELECT c, HEX(c) FROM t1;
DROP TABLE t1;
CREATE TABLE t1
(
c CHAR(16)
......@@ -229,15 +248,27 @@ SELECT c, HEX(c) FROM t1;
DROP TABLE t1;
#
# TODO: Cyrillic does not work
#
#CREATE TABLE t1
#(
# c CHAR(16) CHARACTER SET utf8
#) ENGINE=CONNECT TABLE_TYPE=XML FILE_NAME='cp1251.xml';
#SELECT c, HEX(c) FROM t1;
#DROP TABLE t1;
--echo #
--echo # Testing Cyrillic
--echo #
CREATE TABLE t1
(
c CHAR(16) CHARACTER SET utf8
) ENGINE=CONNECT TABLE_TYPE=XML FILE_NAME='cp1251.xml'
OPTION_LIST='xmlsup=libxml2,rownode=b';
SELECT * FROM t1;
INSERT INTO t1 VALUES ('ИКЛМН');
SELECT c, HEX(c) FROM t1;
DROP TABLE t1;
CREATE TABLE t1
(
c CHAR(16) CHARACTER SET cp1251
) ENGINE=CONNECT TABLE_TYPE=XML FILE_NAME='cp1251.xml'
OPTION_LIST='xmlsup=libxml2,rownode=b';
SELECT * FROM t1;
INSERT INTO t1 VALUES ('ОПРСТ');
SELECT c, HEX(c) FROM t1;
DROP TABLE t1;
#
......
......@@ -207,6 +207,7 @@ struct ha_table_option_struct {
const char *subtype;
const char *catfunc;
const char *oplist;
const char *data_charset;
int lrecl;
int elements;
//int estimate;
......@@ -238,6 +239,7 @@ ha_create_table_option connect_table_option_list[]=
HA_TOPTION_STRING("SUBTYPE", subtype),
HA_TOPTION_STRING("CATFUNC", catfunc),
HA_TOPTION_STRING("OPTION_LIST", oplist),
HA_TOPTION_STRING("DATA_CHARSET", data_charset),
HA_TOPTION_NUMBER("LRECL", lrecl, 0, 0, INT_MAX32, 1),
HA_TOPTION_NUMBER("BLOCK_SIZE", elements, 0, 0, INT_MAX32, 1),
//HA_TOPTION_NUMBER("ESTIMATE", estimate, 0, 0, INT_MAX32, 1),
......@@ -821,6 +823,8 @@ char *ha_connect::GetStringOption(char *opname, char *sdef)
opval= (char*)options->subtype;
else if (!stricmp(opname, "Catfunc"))
opval= (char*)options->catfunc;
else if (!stricmp(opname, "Data_charset"))
opval= (char*)options->data_charset;
if (!opval && options->oplist)
opval= GetListOption(opname, options->oplist);
......@@ -1398,7 +1402,7 @@ int ha_connect::MakeRecord(char *buf)
Field* *field;
Field *fp;
my_bitmap_map *org_bitmap;
CHARSET_INFO *charset;
CHARSET_INFO *charset= tdbp->data_charset();
const MY_BITMAP *map;
PVAL value;
PCOL colp= NULL;
......@@ -1429,9 +1433,6 @@ int ha_connect::MakeRecord(char *buf)
for (field= table->field; *field && !rc; field++) {
fp= *field;
// Default charset
charset= table->s->table_charset;
#if defined(MARIADB)
if (fp->vcol_info && !fp->stored_in_db)
continue; // This is a virtual column
......@@ -1476,7 +1477,6 @@ int ha_connect::MakeRecord(char *buf)
p= NULL;
break;
case TYPE_STRING:
charset= fp->charset();
// Passthru
default:
p= value->GetCharString(val);
......@@ -1513,6 +1513,7 @@ int ha_connect::MakeRecord(char *buf)
int ha_connect::ScanRecord(PGLOBAL g, uchar *buf)
{
char attr_buffer[1024];
char data_buffer[1024];
int rc= 0;
PCOL colp;
PVAL value;
......@@ -1521,6 +1522,8 @@ int ha_connect::ScanRecord(PGLOBAL g, uchar *buf)
String attribute(attr_buffer, sizeof(attr_buffer),
table->s->table_charset);
my_bitmap_map *bmap= dbug_tmp_use_all_columns(table, table->read_set);
const CHARSET_INFO *charset= tdbp->data_charset();
String data_charset_value(data_buffer, sizeof(data_buffer), charset);
// Scan the pseudo record for field values and set column values
for (Field **field=table->field ; *field ; field++) {
......@@ -1564,7 +1567,18 @@ int ha_connect::ScanRecord(PGLOBAL g, uchar *buf)
break;
default:
fp->val_str(&attribute);
if (charset == &my_charset_bin)
{
value->SetValue_psz(attribute.c_ptr());
}
else
{
// Convert from SQL field charset to DATA_CHARSET
uint cnv_errors;
data_charset_value.copy(attribute.ptr(), attribute.length(),
attribute.charset(), charset, &cnv_errors);
value->SetValue_psz(data_charset_value.c_ptr());
}
} // endswitch Type
#ifdef NEWCHANGE
......@@ -3639,6 +3653,7 @@ int ha_connect::create(const char *name, TABLE *table_arg,
TABLE *st= table; // Probably unuseful
PIXDEF xdp, pxd= NULL, toidx= NULL;
PGLOBAL g= GetPlug(table_arg->in_use);
const CHARSET_INFO *data_charset;
DBUG_ENTER("ha_connect::create");
PTOS options= GetTableOptionStruct(table_arg);
......@@ -3646,6 +3661,30 @@ int ha_connect::create(const char *name, TABLE *table_arg,
// CONNECT engine specific table options:
DBUG_ASSERT(options);
if (options->data_charset)
{
if (!(data_charset= get_charset_by_csname(options->data_charset,
MY_CS_PRIMARY, MYF(0))))
{
my_error(ER_UNKNOWN_CHARACTER_SET, MYF(0), options->data_charset);
DBUG_RETURN(HA_ERR_INTERNAL_ERROR);
}
if (GetTypeID(options->type) == TAB_XML &&
data_charset != &my_charset_utf8_general_ci)
{
my_printf_error(ER_UNKNOWN_ERROR,
"DATA_CHARSET='%s' is not supported for TABLE_TYPE=XML",
MYF(0), options->data_charset);
DBUG_RETURN(HA_ERR_INTERNAL_ERROR);
}
}
else
{
data_charset= create_info->default_table_charset ?
create_info->default_table_charset :
&my_charset_latin1;
}
if (!g) {
rc= HA_ERR_INTERNAL_ERROR;
DBUG_RETURN(rc);
......
......@@ -375,7 +375,7 @@ int LIBXMLDOC::Decode(xmlChar *cnt, char *buf, int n)
{
const char *txt = (const char *)cnt;
uint dummy_errors;
uint32 len= copy_and_convert(buf, n, &my_charset_latin1, txt,
uint32 len= copy_and_convert(buf, n, &my_charset_utf8_general_ci, txt,
strlen(txt), &my_charset_utf8_general_ci,
&dummy_errors);
buf[len]= '\0';
......@@ -387,7 +387,7 @@ int LIBXMLDOC::Decode(xmlChar *cnt, char *buf, int n)
/******************************************************************/
xmlChar *LIBXMLDOC::Encode(PGLOBAL g, char *txt)
{
const CHARSET_INFO *ics= &my_charset_latin1; // TODO: Field->charset()
const CHARSET_INFO *ics= &my_charset_utf8_general_ci;
const CHARSET_INFO *ocs= &my_charset_utf8_general_ci;
size_t i = strlen(txt);
size_t o = i * ocs->mbmaxlen / ics->mbmaxlen + 1;
......
......@@ -102,6 +102,10 @@ bool TABDEF::Define(PGLOBAL g, PCATLG cat, LPCSTR name, LPCSTR am)
Multiple = cat->GetIntCatInfo(name, "Multiple", 0);
Degree = cat->GetIntCatInfo(name, "Degree", 0);
Read_Only = cat->GetBoolCatInfo(name, "ReadOnly", false);
const char *data_charset_name= cat->GetStringCatInfo(g, Name, "Data_charset", NULL);
m_data_charset= data_charset_name ?
get_charset_by_csname(data_charset_name, MY_CS_PRIMARY, 0):
NULL;
// Get The column definitions
if ((poff = cat->GetColCatInfo(g, this)) < 0)
......
......@@ -11,6 +11,7 @@
#include "block.h"
#include "catalog.h"
#include "my_sys.h"
typedef class INDEXDEF *PIXDEF;
......@@ -75,6 +76,10 @@ class DllExport TABDEF : public RELDEF { /* Logical table descriptor */
virtual PIXDEF GetIndx(void) {return NULL;}
virtual void SetIndx(PIXDEF xp) {}
virtual bool IsHuge(void) {return false;}
const CHARSET_INFO *data_charset()
{
return m_data_charset;
}
// Methods
bool DropTable(PGLOBAL g, PSZ name);
......@@ -93,6 +98,7 @@ class DllExport TABDEF : public RELDEF { /* Logical table descriptor */
int Degree; /* Number of columns in the table */
int Pseudo; /* Bit: 1 ROWID Ok, 2 FILEID Ok */
bool Read_Only; /* true for read only tables */
const CHARSET_INFO *m_data_charset;
}; // end of TABDEF
/***********************************************************************/
......
......@@ -242,6 +242,7 @@ TDBASE::TDBASE(PTABDEF tdp) : TDB(tdp)
MaxSize = -1;
Knum = 0;
Read_Only = (tdp) ? tdp->IsReadOnly() : false;
m_data_charset= (tdp) ? tdp->data_charset() : NULL;
} // end of TDBASE constructor
TDBASE::TDBASE(PTDBASE tdbp) : TDB(tdbp)
......@@ -250,6 +251,7 @@ TDBASE::TDBASE(PTDBASE tdbp) : TDB(tdbp)
To_SetCols = tdbp->To_SetCols; // ???
MaxSize = tdbp->MaxSize;
Read_Only = tdbp->Read_Only;
m_data_charset= tdbp->m_data_charset;
} // end of TDBASE copy constructor
/***********************************************************************/
......
......@@ -101,6 +101,10 @@ class DllExport TDBXML : public TDBASE {
virtual int DeleteDB(PGLOBAL g, int irc);
virtual void CloseDB(PGLOBAL g);
virtual int CheckWrite(PGLOBAL g) {Checked = true; return 0;}
virtual const CHARSET_INFO *data_charset()
{
return &my_charset_utf8_general_ci;
}
protected:
// Members
......
......@@ -16,6 +16,7 @@
#include "assert.h"
#include "block.h"
#include "colblk.h"
#include "m_ctype.h"
//pedef class INDEXDEF *PIXDEF;
typedef char *PFIL; // Specific to CONNECT
......@@ -104,6 +105,7 @@ class DllExport TDB: public TBX { // Table Descriptor Block.
virtual int Cardinality(PGLOBAL g) {return (g) ? -1 : 0;}
virtual int RowNumber(PGLOBAL g, bool b = false);
virtual bool IsReadOnly(void) {return true;}
virtual const CHARSET_INFO *data_charset() { return NULL; }
virtual PTDB Duplicate(PGLOBAL g) {return NULL;}
virtual PTDB CopyOne(PTABS t) {return this;}
virtual PTBX Copy(PTABS t);
......@@ -171,6 +173,15 @@ class DllExport TDBASE : public TDB {
virtual int GetRecpos(void) = 0;
virtual bool SetRecpos(PGLOBAL g, int recpos);
virtual bool IsReadOnly(void) {return Read_Only;}
virtual CHARSET_INFO *data_charset()
{
/*
If no DATA_CHARSET is specified, we assume that character
set of the remote data is the same with CHARACTER SET
definition of the SQL column.
*/
return m_data_charset ? m_data_charset : &my_charset_bin;
}
virtual int GetProgMax(PGLOBAL g) {return GetMaxSize(g);}
virtual int GetProgCur(void) {return GetRecpos();}
virtual PSZ GetFile(PGLOBAL g) {return "Not a file";}
......@@ -199,6 +210,7 @@ class DllExport TDBASE : public TDB {
int MaxSize; // Max size in number of lines
int Knum; // Size of key arrays
bool Read_Only; // True for read only tables
const CHARSET_INFO *m_data_charset;
}; // end of class TDBASE
/***********************************************************************/
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment