Commit f34debd9 authored by pekka@mysql.com's avatar pekka@mysql.com

wl-1884 storing NULL in ordered index

parent 43efb3cc
......@@ -86,26 +86,25 @@ public:
/**
* Define bound on index key in range scan.
*
* Each index key can have not null lower and/or upper bound, or can
* be set equal to not null value. The bounds can be defined in any
* order but a duplicate definition is an error.
* Each index key can have lower and/or upper bound, or can be set
* equal to a value. The bounds can be defined in any order but
* a duplicate definition is an error.
*
* The scan is most effective when bounds are given for an initial
* sequence of non-nullable index keys, and all but the last one is an
* equality. In this case the scan returns a contiguous range from
* each ordered index fragment.
* The bounds must specify a single range i.e. they are on an initial
* sequence of index keys and the condition is equality for all but
* (at most) the last key which has a lower and/or upper bound.
*
* @note This release implements only the case described above,
* except for the non-nullable limitation. Other sets of
* bounds return error or empty result set.
* NULL is treated like a normal value which is less than any not-NULL
* value and equal to another NULL value. To search for NULL use
* setBound with null pointer (0).
*
* @note In this release a null key value satisfies any lower
* bound and no upper bound. This may change.
* An index stores also all-NULL keys (this may become optional).
* Doing index scan with empty bound set returns all table tuples.
*
* @param attrName Attribute name, alternatively:
* @param anAttrId Index column id (starting from 0).
* @param anAttrId Index column id (starting from 0)
* @param type Type of bound
* @param value Pointer to bound value
* @param value Pointer to bound value, 0 for NULL
* @param len Value length in bytes.
* Fixed per datatype and can be omitted
* @return 0 if successful otherwise -1
......
......@@ -446,6 +446,7 @@ private:
Uint32 m_descPage; // descriptor page
Uint16 m_descOff; // offset within the page
Uint16 m_numAttrs;
bool m_storeNullKey;
union {
Uint32 nextPool;
};
......@@ -469,6 +470,7 @@ private:
Uint32 m_descPage; // copy from index level
Uint16 m_descOff;
Uint16 m_numAttrs;
bool m_storeNullKey;
TreeHead m_tree;
TupLoc m_freeLoc; // one node pre-allocated for insert
DLList<ScanOp> m_scanList; // current scans on this fragment
......@@ -993,7 +995,8 @@ Dbtux::Index::Index() :
m_numFrags(0),
m_descPage(RNIL),
m_descOff(0),
m_numAttrs(0)
m_numAttrs(0),
m_storeNullKey(false)
{
for (unsigned i = 0; i < MaxIndexFragments; i++) {
m_fragId[i] = ZNIL;
......@@ -1012,6 +1015,7 @@ Dbtux::Frag::Frag(ArrayPool<ScanOp>& scanOpPool) :
m_descPage(RNIL),
m_descOff(0),
m_numAttrs(ZNIL),
m_storeNullKey(false),
m_tree(),
m_freeLoc(),
m_scanList(scanOpPool),
......
......@@ -62,15 +62,15 @@ Dbtux::cmpSearchKey(const Frag& frag, unsigned& start, TableData searchKey, Cons
}
} else {
jam();
// not NULL < NULL
ret = -1;
// not NULL > NULL
ret = +1;
break;
}
} else {
if (! entryData.ah().isNULL()) {
jam();
// NULL > not NULL
ret = +1;
// NULL < not NULL
ret = -1;
break;
}
}
......@@ -116,15 +116,15 @@ Dbtux::cmpSearchKey(const Frag& frag, unsigned& start, TableData searchKey, Tabl
}
} else {
jam();
// not NULL < NULL
ret = -1;
// not NULL > NULL
ret = +1;
break;
}
} else {
if (*entryKey != 0) {
jam();
// NULL > not NULL
ret = +1;
// NULL < not NULL
ret = -1;
break;
}
}
......@@ -180,36 +180,41 @@ Dbtux::cmpScanBound(const Frag& frag, unsigned dir, ConstData boundInfo, unsigne
// get and skip bound type
type = boundInfo[0];
boundInfo += 1;
ndbrequire(! boundInfo.ah().isNULL());
if (! entryData.ah().isNULL()) {
jam();
// current attribute
const unsigned index = boundInfo.ah().getAttributeId();
const DescAttr& descAttr = descEnt.m_descAttr[index];
const unsigned typeId = descAttr.m_typeId;
ndbrequire(entryData.ah().getAttributeId() == descAttr.m_primaryAttrId);
// full data size
const unsigned size1 = boundInfo.ah().getDataSize();
ndbrequire(size1 != 0 && size1 == entryData.ah().getDataSize());
const unsigned size2 = min(size1, len2);
len2 -= size2;
// compare
const Uint32* const p1 = &boundInfo[AttributeHeaderSize];
const Uint32* const p2 = &entryData[AttributeHeaderSize];
int ret = NdbSqlUtil::cmp(typeId, p1, p2, size1, size2);
// XXX until data format errors are handled
ndbrequire(ret != NdbSqlUtil::CmpError);
if (ret != 0) {
if (! boundInfo.ah().isNULL()) {
if (! entryData.ah().isNULL()) {
jam();
// current attribute
const unsigned index = boundInfo.ah().getAttributeId();
const DescAttr& descAttr = descEnt.m_descAttr[index];
const unsigned typeId = descAttr.m_typeId;
ndbrequire(entryData.ah().getAttributeId() == descAttr.m_primaryAttrId);
// full data size
const unsigned size1 = boundInfo.ah().getDataSize();
ndbrequire(size1 != 0 && size1 == entryData.ah().getDataSize());
const unsigned size2 = min(size1, len2);
len2 -= size2;
// compare
const Uint32* const p1 = &boundInfo[AttributeHeaderSize];
const Uint32* const p2 = &entryData[AttributeHeaderSize];
int ret = NdbSqlUtil::cmp(typeId, p1, p2, size1, size2);
// XXX until data format errors are handled
ndbrequire(ret != NdbSqlUtil::CmpError);
if (ret != 0) {
jam();
return ret;
}
} else {
jam();
return ret;
// not NULL > NULL
return +1;
}
} else {
jam();
/*
* NULL is bigger than any bound, thus the boundary is always to
* the left of NULL.
*/
return -1;
if (! entryData.ah().isNULL()) {
jam();
// NULL < not NULL
return -1;
}
}
boundInfo += AttributeHeaderSize + boundInfo.ah().getDataSize();
entryData += AttributeHeaderSize + entryData.ah().getDataSize();
......@@ -258,32 +263,37 @@ Dbtux::cmpScanBound(const Frag& frag, unsigned dir, ConstData boundInfo, unsigne
// get and skip bound type
type = boundInfo[0];
boundInfo += 1;
ndbrequire(! boundInfo.ah().isNULL());
if (*entryKey != 0) {
jam();
// current attribute
const unsigned index = boundInfo.ah().getAttributeId();
const DescAttr& descAttr = descEnt.m_descAttr[index];
const unsigned typeId = descAttr.m_typeId;
// full data size
const unsigned size1 = AttributeDescriptor::getSizeInWords(descAttr.m_attrDesc);
// compare
const Uint32* const p1 = &boundInfo[AttributeHeaderSize];
const Uint32* const p2 = *entryKey;
int ret = NdbSqlUtil::cmp(typeId, p1, p2, size1, size1);
// XXX until data format errors are handled
ndbrequire(ret != NdbSqlUtil::CmpError);
if (ret != 0) {
if (! boundInfo.ah().isNULL()) {
if (*entryKey != 0) {
jam();
// current attribute
const unsigned index = boundInfo.ah().getAttributeId();
const DescAttr& descAttr = descEnt.m_descAttr[index];
const unsigned typeId = descAttr.m_typeId;
// full data size
const unsigned size1 = AttributeDescriptor::getSizeInWords(descAttr.m_attrDesc);
// compare
const Uint32* const p1 = &boundInfo[AttributeHeaderSize];
const Uint32* const p2 = *entryKey;
int ret = NdbSqlUtil::cmp(typeId, p1, p2, size1, size1);
// XXX until data format errors are handled
ndbrequire(ret != NdbSqlUtil::CmpError);
if (ret != 0) {
jam();
return ret;
}
} else {
jam();
return ret;
// not NULL > NULL
return +1;
}
} else {
jam();
/*
* NULL is bigger than any bound, thus the boundary is always to
* the left of NULL.
*/
return -1;
if (*entryKey != 0) {
jam();
// NULL < not NULL
return -1;
}
}
boundInfo += AttributeHeaderSize + boundInfo.ah().getDataSize();
entryKey += 1;
......
......@@ -82,8 +82,8 @@ Dbtux::execTUX_MAINT_REQ(Signal* signal)
ent.m_fragBit = fragBit;
// read search key
readKeyAttrs(frag, ent, 0, c_searchKey);
// check if all keys are null
{
if (! frag.m_storeNullKey) {
// check if all keys are null
const unsigned numAttrs = frag.m_numAttrs;
bool allNull = true;
for (unsigned i = 0; i < numAttrs; i++) {
......
......@@ -85,6 +85,7 @@ Dbtux::execTUXFRAGREQ(Signal* signal)
fragPtr.p->m_fragOff = req->fragOff;
fragPtr.p->m_fragId = req->fragId;
fragPtr.p->m_numAttrs = req->noOfAttr;
fragPtr.p->m_storeNullKey = true; // not yet configurable
fragPtr.p->m_tupIndexFragPtrI = req->tupIndexFragPtrI;
fragPtr.p->m_tupTableFragPtrI[0] = req->tupTableFragPtrI[0];
fragPtr.p->m_tupTableFragPtrI[1] = req->tupTableFragPtrI[1];
......@@ -111,6 +112,7 @@ Dbtux::execTUXFRAGREQ(Signal* signal)
indexPtr.p->m_tableId = req->primaryTableId;
indexPtr.p->m_fragOff = req->fragOff;
indexPtr.p->m_numAttrs = req->noOfAttr;
indexPtr.p->m_storeNullKey = true; // not yet configurable
// allocate attribute descriptors
if (! allocDescEnt(indexPtr)) {
jam();
......
......@@ -137,7 +137,7 @@ Dbtux::execTUX_BOUND_INFO(Signal* signal)
const Uint32* const data = (Uint32*)sig + TuxBoundInfo::SignalLength;
unsigned offset = 5;
// walk through entries
while (offset + 2 < req->boundAiLength) {
while (offset + 2 <= req->boundAiLength) {
jam();
const unsigned type = data[offset];
if (type > 4) {
......
......@@ -21,11 +21,11 @@ shows ms / 1000 rows for each and pct overhead
c
1 million rows, index on PK, full table scan, full index scan
shows ms / 1000 rows for each and index time pct
shows ms / 1000 rows for each and index time overhead
d
1 million rows, index on PK, read table via each pk, scan index for each pk
shows ms / 1000 rows for each and index time pct
shows ms / 1000 rows for each and index time overhead
samples 10% of all PKs (100,000 pk reads, 100,000 scans)
040616 mc02/a 40 ms 87 ms 114 pct
......@@ -66,12 +66,20 @@ optim 11 mc02/a 43 ms 63 ms 46 pct
optim 12 mc02/a 38 ms 55 ms 43 pct
mc02/b 47 ms 77 ms 63 pct
mc02/c 10 ms 14 ms 147 pct
mc02/d 176 ms 281 ms 159 pct
mc02/c 10 ms 14 ms 47 pct
mc02/d 176 ms 281 ms 59 pct
optim 13 mc02/a 40 ms 57 ms 42 pct
mc02/b 47 ms 77 ms 61 pct
mc02/c 9 ms 13 ms 150 pct
mc02/d 170 ms 256 ms 150 pct
mc02/c 9 ms 13 ms 50 pct
mc02/d 170 ms 256 ms 50 pct
after wl-1884 store all-NULL keys (the tests have pctnull=10 per column)
[ what happened to PK read performance? ]
optim 13 mc02/a 39 ms 59 ms 50 pct
mc02/b 47 ms 77 ms 61 pct
mc02/c 9 ms 12 ms 44 pct
mc02/d 246 ms 289 ms 17 pct
vim: set et:
......@@ -1125,7 +1125,6 @@ NdbIndexScanOperation::setBound(const NdbColumnImpl* tAttrInfo,
if (theOperationType == OpenRangeScanRequest &&
theStatus == SetBound &&
(0 <= type && type <= 4) &&
aValue != NULL &&
len <= 8000) {
// bound type
......@@ -1136,20 +1135,22 @@ NdbIndexScanOperation::setBound(const NdbColumnImpl* tAttrInfo,
setErrorCodeAbort(4209);
return -1;
}
len = sizeInBytes;
len = aValue != NULL ? sizeInBytes : 0;
Uint32 tIndexAttrId = tAttrInfo->m_attrId;
Uint32 sizeInWords = (len + 3) / 4;
AttributeHeader ah(tIndexAttrId, sizeInWords);
insertATTRINFO(ah.m_value);
// attribute data
if ((UintPtr(aValue) & 0x3) == 0 && (len & 0x3) == 0)
insertATTRINFOloop((const Uint32*)aValue, sizeInWords);
else {
Uint32 temp[2000];
memcpy(temp, aValue, len);
while ((len & 0x3) != 0)
((char*)temp)[len++] = 0;
insertATTRINFOloop(temp, sizeInWords);
if (len != 0) {
// attribute data
if ((UintPtr(aValue) & 0x3) == 0 && (len & 0x3) == 0)
insertATTRINFOloop((const Uint32*)aValue, sizeInWords);
else {
Uint32 temp[2000];
memcpy(temp, aValue, len);
while ((len & 0x3) != 0)
((char*)temp)[len++] = 0;
insertATTRINFOloop(temp, sizeInWords);
}
}
/**
......@@ -1236,7 +1237,7 @@ NdbIndexScanOperation::compare(Uint32 skip, Uint32 cols,
Uint32 * d2 = (Uint32*)r2->aRef();
unsigned r1_null = r1->isNULL();
if((r1_null ^ (unsigned)r2->isNULL())){
return (r1_null ? 1 : -1);
return (r1_null ? -1 : 1);
}
Uint32 type = NdbColumnImpl::getImpl(* r1->m_column).m_extType;
Uint32 size = (r1->theAttrSize * r1->theArraySize + 3) / 4;
......
......@@ -85,7 +85,7 @@ printhelp()
<< " -dups allow duplicate tuples from index scan [" << d.m_dups << "]" << endl
<< " -fragtype T fragment type single/small/medium/large" << endl
<< " -index xyz only given index numbers (digits 1-9)" << endl
<< " -loop N loop count full suite forever=0 [" << d.m_loop << "]" << endl
<< " -loop N loop count full suite 0=forever [" << d.m_loop << "]" << endl
<< " -nologging create tables in no-logging mode" << endl
<< " -rows N rows per thread [" << d.m_rows << "]" << endl
<< " -samples N samples for some timings (0=all) [" << d.m_samples << "]" << endl
......@@ -102,6 +102,12 @@ printhelp()
printtables();
}
// not yet configurable
static const bool g_store_null_key = true;
// compare NULL like normal value (NULL < not NULL, NULL == NULL)
static const bool g_compare_null = true;
// log and error macros
static NdbMutex ndbout_mutex = NDB_MUTEX_INITIALIZER;
......@@ -306,8 +312,8 @@ Tmr::pct(const Tmr& t1)
const char*
Tmr::over(const Tmr& t1)
{
if (0 < t1.m_ms && t1.m_ms < m_ms) {
sprintf(m_text, "%u pct", (100 * (m_ms - t1.m_ms)) / t1.m_ms);
if (0 < t1.m_ms) {
sprintf(m_text, "%d pct", (100 * (m_ms - t1.m_ms)) / t1.m_ms);
} else {
sprintf(m_text, "[cannot measure]");
}
......@@ -1168,9 +1174,9 @@ Val::cmp(const Val& val2) const
assert(col.m_type == col2.m_type && col.m_length == col2.m_length);
if (m_null || val2.m_null) {
if (! m_null)
return -1;
if (! val2.m_null)
return +1;
if (! val2.m_null)
return -1;
return 0;
}
// verify data formats
......@@ -1695,8 +1701,8 @@ int
BVal::setbnd(Par par) const
{
Con& con = par.con();
const char* addr = (const char*)dataaddr();
assert(! m_null);
assert(g_compare_null || ! m_null);
const char* addr = ! m_null ? (const char*)dataaddr() : 0;
const ICol& icol = m_icol;
CHK(con.setBound(icol.m_num, m_type, addr) == 0);
return 0;
......@@ -1785,7 +1791,8 @@ BSet::calc(Par par)
if (k + 1 < itab.m_icols)
bval.m_type = 4;
// value generation parammeters
par.m_pctnull = 0;
if (! g_compare_null)
par.m_pctnull = 0;
par.m_pctrange = 50; // bit higher
do {
bval.calc(par, 0);
......@@ -1842,18 +1849,20 @@ BSet::filter(const Set& set, Set& set2) const
if (! set.exist(i))
continue;
const Row& row = *set.m_row[i];
bool ok1 = false;
for (unsigned k = 0; k < itab.m_icols; k++) {
const ICol& icol = itab.m_icol[k];
const Col& col = icol.m_col;
const Val& val = *row.m_val[col.m_num];
if (! val.m_null) {
ok1 = true;
break;
if (! g_store_null_key) {
bool ok1 = false;
for (unsigned k = 0; k < itab.m_icols; k++) {
const ICol& icol = itab.m_icol[k];
const Col& col = icol.m_col;
const Val& val = *row.m_val[col.m_num];
if (! val.m_null) {
ok1 = true;
break;
}
}
if (! ok1)
continue;
}
if (! ok1)
continue;
bool ok2 = true;
for (unsigned j = 0; j < m_bvals; j++) {
const BVal& bval = *m_bval[j];
......@@ -2727,13 +2736,13 @@ tpkops(Par par)
RUNSTEP(par, pkinsert, MT);
RUNSTEP(par, createindex, ST);
RUNSTEP(par, invalidateindex, MT);
RUNSTEP(par, readverify, MT);
RUNSTEP(par, readverify, ST);
for (unsigned i = 0; i < par.m_subloop; i++) {
RUNSTEP(par, pkupdatescanread, MT);
RUNSTEP(par, readverify, MT);
RUNSTEP(par, readverify, ST);
}
RUNSTEP(par, pkdelete, MT);
RUNSTEP(par, readverify, MT);
RUNSTEP(par, readverify, ST);
return 0;
}
......@@ -2746,10 +2755,10 @@ tmixedops(Par par)
RUNSTEP(par, pkinsert, MT);
RUNSTEP(par, createindex, ST);
RUNSTEP(par, invalidateindex, MT);
RUNSTEP(par, readverify, MT);
RUNSTEP(par, readverify, ST);
for (unsigned i = 0; i < par.m_subloop; i++) {
RUNSTEP(par, mixedoperations, MT);
RUNSTEP(par, readverify, MT);
RUNSTEP(par, readverify, ST);
}
return 0;
}
......@@ -2832,7 +2841,7 @@ ttimescan(Par par)
}
LL1("full scan table - " << t1.time());
LL1("full scan PK index - " << t2.time());
LL1("index time pct - " << t2.pct(t1));
LL1("overhead - " << t2.over(t1));
return 0;
}
......@@ -2854,7 +2863,7 @@ ttimepkread(Par par)
}
LL1("pk read table - " << t1.time());
LL1("pk read PK index - " << t2.time());
LL1("index time pct - " << t2.pct(t1));
LL1("overhead - " << t2.over(t1));
return 0;
}
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment