ndb - super pool update (future use)

storage/ndb/src/kernel/vm/SuperPool.cpp: super pool update storage/ndb/src/kernel/vm/SuperPool.hpp: super pool update storage/ndb/src/kernel/vm/testSuperPool.cpp: super pool update

ndb - super pool update (future use)
storage/ndb/src/kernel/vm/SuperPool.cpp: super pool update storage/ndb/src/kernel/vm/SuperPool.hpp: super pool update storage/ndb/src/kernel/vm/testSuperPool.cpp: super pool update
e70d1493 · unknown · 835426ce · e70d1493 · e70d1493 · e70d1493
Commit e70d1493 authored Nov 08, 2005 by unknown
3 changed files
--- a/storage/ndb/src/kernel/vm/SuperPool.cpp
+++ b/storage/ndb/src/kernel/vm/SuperPool.cpp
@@ -17,28 +17,33 @@
 #include <ndb_global.h>
 #include "SuperPool.hpp"

+#define SP_ALIGN(sz, al) (((sz) + (al) - 1) & ~((al) - 1))
+
+// This is used for m_freeRecI when there is no record pool page.
+#define NNIL    0xffffffff
+
 // SuperPool

 SuperPool::SuperPool(Uint32 pageSize, Uint32 pageBits) :
-  m_pageSize(SP_ALIGN_SIZE(pageSize, SP_ALIGN)),
+  m_pageSize(pageSize),
  m_pageBits(pageBits),
+  m_recBits(32 - m_pageBits),
+  m_recMask((1 << m_recBits) - 1),
  m_memRoot(0),
  m_pageEnt(0),
-  m_typeCheck(0),
-  m_typeSeq(0),
-  m_pageList(),
-  m_totalSize(0),
-  m_initSize(0),
-  m_incrSize(0),
-  m_maxSize(0)
-{
-  assert(5 <= pageBits <= 30);
-}
-
-bool
-SuperPool::init()
+  m_pageType(0),
+  m_freeList(),
+  m_initPages(0),
+  m_incrPages(0),
+  m_maxPages(0),
+  m_totPages(0),
+  m_typeCount(0),
+  m_groupMinPct(0),
+  m_groupMinPages(0),
+  m_groupTotPages(0)
 {
-  return true;
+  assert(m_pageSize != 0 && (m_pageSize & (m_pageSize - 1)) == 0);
+  assert(m_pageBits <= 30);
 }

 SuperPool::~SuperPool()
@@ -47,13 +52,15 @@ SuperPool::~SuperPool()

 SuperPool::PageEnt::PageEnt() :
  m_pageType(0),
-  m_freeRecI(RNIL),
  m_useCount(0),
+  m_freeRecI(NNIL),
  m_nextPageI(RNIL),
  m_prevPageI(RNIL)
 {
 }

+// page list routines
+
 SuperPool::PageList::PageList() :
  m_headPageI(RNIL),
  m_tailPageI(RNIL),
@@ -66,55 +73,29 @@ SuperPool::PageList::PageList(PtrI pageI) :
  m_tailPageI(pageI),
  m_pageCount(1)
 {
-}
-
-SuperPool::RecInfo::RecInfo(Uint32 recType, Uint32 recSize) :
-  m_recType(recType),
-  m_recSize(recSize),
-  m_maxUseCount(0),
-  m_currPageI(RNIL),
-  m_currFreeRecI(RNIL),
-  m_currUseCount(0),
-  m_totalUseCount(0),
-  m_totalRecCount(0),
-  m_freeList(),
-  m_activeList(),
-  m_fullList()
-{
-}
-
-SuperPool::PtrI
-SuperPool::getPageI(void* pageP)
-{
-  const Uint32 pageSize = m_pageSize;
-  const Uint32 pageBits = m_pageBits;
-  const Uint32 recBits = 32 - pageBits;
-  void* const memRoot = m_memRoot;
-  assert(pageP == SP_ALIGN_PTR(pageP, memRoot, pageSize));
-  my_ptrdiff_t ipL = ((Uint8*)pageP - (Uint8*)memRoot) / pageSize;
-  Int32 ip = (Int32)ipL;
-  Int32 lim = 1 << (pageBits - 1);
-  assert(ip == ipL && -lim <= ip && ip < lim && ip != -1);
-  PtrI pageI = ip << recBits;
-  assert(pageP == getPageP(pageI));
-  return pageI;
+  assert(pageI != RNIL);
 }

 void
 SuperPool::movePages(PageList& pl1, PageList& pl2)
 {
-  const Uint32 recBits = 32 - m_pageBits;
+  PtrI pageI1 = pl1.m_tailPageI;
+  PtrI pageI2 = pl2.m_headPageI;
  if (pl1.m_pageCount != 0) {
+    assert(pageI1 != RNIL);
    if (pl2.m_pageCount != 0) {
-      PtrI pageI1 = pl1.m_tailPageI;
-      PtrI pageI2 = pl2.m_headPageI;
+      assert(pageI2 != RNIL);
      PageEnt& pe1 = getPageEnt(pageI1);
      PageEnt& pe2 = getPageEnt(pageI2);
      pe1.m_nextPageI = pageI2;
      pe2.m_prevPageI = pageI1;
+      pl1.m_tailPageI = pl2.m_tailPageI;
      pl1.m_pageCount += pl2.m_pageCount;
+    } else {
+      assert(pageI2 == RNIL);
    }
  } else {
+    assert(pageI1 == RNIL);
    pl1 = pl2;
  }
  pl2.m_headPageI = pl2.m_tailPageI = RNIL;
@@ -124,6 +105,9 @@ SuperPool::movePages(PageList& pl1, PageList& pl2)
 void
 SuperPool::addHeadPage(PageList& pl, PtrI pageI)
 {
+  assert(pageI != RNIL);
+  PageEnt& pe = getPageEnt(pageI);
+  assert(pe.m_nextPageI == RNIL & pe.m_prevPageI == RNIL);
  PageList pl2(pageI);
  movePages(pl2, pl);
  pl = pl2;
@@ -132,6 +116,9 @@ SuperPool::addHeadPage(PageList& pl, PtrI pageI)
 void
 SuperPool::addTailPage(PageList& pl, PtrI pageI)
 {
+  assert(pageI != RNIL);
+  PageEnt& pe = getPageEnt(pageI);
+  assert(pe.m_nextPageI == RNIL & pe.m_prevPageI == RNIL);
  PageList pl2(pageI);
  movePages(pl, pl2);
 }
@@ -139,81 +126,187 @@ SuperPool::addTailPage(PageList& pl, PtrI pageI)
 void
 SuperPool::removePage(PageList& pl, PtrI pageI)
 {
+  assert(pageI != RNIL);
  PageEnt& pe = getPageEnt(pageI);
-  PtrI pageI1 = pe.m_prevPageI;
-  PtrI pageI2 = pe.m_nextPageI;
-  if (pageI1 != RNIL) {
-    PageEnt& pe1 = getPageEnt(pageI1);
-    pe1.m_nextPageI = pageI2;
-    if (pageI2 != RNIL) {
-      PageEnt& pe2 = getPageEnt(pageI2);
-      pe2.m_prevPageI = pageI1;
-    } else {
-      pl.m_tailPageI = pageI1;
-    }
+  if (pe.m_nextPageI != RNIL) {
+    assert(pl.m_tailPageI != pageI);
+    PageEnt& nextPe = getPageEnt(pe.m_nextPageI);
+    nextPe.m_prevPageI = pe.m_prevPageI;
  } else {
-    if (pageI2 != RNIL) {
-      PageEnt& pe2 = getPageEnt(pageI2);
-      pe2.m_prevPageI = pageI1;
-      pl.m_headPageI = pageI2;
-    } else {
-      pl.m_headPageI = pl.m_tailPageI = RNIL;
-    }
+    assert(pl.m_tailPageI == pageI);
+    pl.m_tailPageI = pe.m_prevPageI;
  }
-  pe.m_prevPageI = pe.m_nextPageI = RNIL;
+  if (pe.m_prevPageI != RNIL) {
+    assert(pl.m_headPageI != pageI);
+    PageEnt& prevPe = getPageEnt(pe.m_prevPageI);
+    prevPe.m_nextPageI = pe.m_nextPageI;
+  } else {
+    assert(pl.m_headPageI == pageI);
+    pl.m_headPageI = pe.m_nextPageI;
+  }
+  pe.m_nextPageI = RNIL;
+  pe.m_prevPageI = RNIL;
  assert(pl.m_pageCount != 0);
  pl.m_pageCount--;
 }

-void
-SuperPool::setCurrPage(RecInfo& ri, PtrI newPageI)
-{
-  PtrI oldPageI = ri.m_currPageI;
-  if (oldPageI != RNIL) {
-    // copy from cache
-    PageEnt& pe = getPageEnt(oldPageI);
-    pe.m_freeRecI = ri.m_currFreeRecI;
-    pe.m_useCount = ri.m_currUseCount;
-    // add to right list according to "pp2" policy
-    if (pe.m_useCount == 0) {
-      pe.m_pageType = 0;
-      addHeadPage(m_pageList, oldPageI);
-      ri.m_totalRecCount -= ri.m_maxUseCount;
-    } else if (pe.m_useCount < ri.m_maxUseCount) {
-      addHeadPage(ri.m_activeList, oldPageI);
-    } else {
-      addHeadPage(ri.m_fullList, oldPageI);
-    }
+// reverse mapping
+
+SuperPool::PtrI
+SuperPool::getPageI(void* pageP)
+{
+  Uint32 pageSize = m_pageSize;
+  Uint32 pageBits = m_pageBits;
+  Uint32 recBits = m_recBits;
+  void* memRoot = m_memRoot;
+  my_ptrdiff_t ipL = (Uint8*)pageP - (Uint8*)memRoot;
+  assert(ipL % pageSize == 0);
+  ipL /= (Int32)pageSize;
+  Int32 ip = (Int32)ipL;
+  Int32 lim = 1 << (pageBits - 1);
+  if (! (ip == ipL && -lim <= ip && ip < lim && ip != -1)) {
+    // page was too distant from memory root
+    return RNIL;
  }
-  if (newPageI != RNIL) {
-    PageEnt& pe = getPageEnt(newPageI);
-    // copy to cache
-    ri.m_currPageI = newPageI;
-    ri.m_currFreeRecI = pe.m_freeRecI;
-    ri.m_currUseCount = pe.m_useCount;
-    // remove from right list
-    if (pe.m_useCount == 0) {
-      removePage(ri.m_freeList, newPageI);
-    } else if (pe.m_useCount < ri.m_maxUseCount) {
-      removePage(ri.m_activeList, newPageI);
-    } else {
-      removePage(ri.m_fullList, newPageI);
-    }
-  } else {
-    ri.m_currPageI = RNIL;
-    ri.m_currFreeRecI = RNIL;
-    ri.m_currUseCount = 0;
+  PtrI pageI = ip << recBits;
+  assert(pageP == getPageP(pageI));
+  return pageI;
+}
+
+// record pool
+
+SuperPool::RecInfo::RecInfo(GroupPool& gp, Uint32 recSize) :
+  m_groupPool(gp),
+  m_recSize(recSize),
+  m_recType(0),
+  m_maxPerPage(0),
+  m_freeRecI(NNIL),
+  m_useCount(0),
+  m_pageList(),
+  m_hyX(1),
+  m_hyY(2)
+{
+  SuperPool& sp = gp.m_superPool;
+  m_recType = (sp.m_typeCount++ << 1) | 1;
+  assert(m_recSize == SP_ALIGN(m_recSize, sizeof(Uint32)));
+  { // compute max records per page
+    Uint32 n1 = sp.m_pageSize / m_recSize;
+    Uint32 b2 = (sp.m_recBits < 16 ? sp.m_recBits : 16);
+    Uint32 n2 = (1 << b2) - 1;  // last is reserved
+    m_maxPerPage = (n1 < n2 ? n1 : n2);
+    assert(m_maxPerPage != 0);
+  }
+}
+
+Uint32
+SuperPool::getFreeCount(RecInfo& ri, PtrI recI)
+{
+  Uint32 n = 0;
+  Uint32 recMask = m_recMask;
+  Uint32 loopRecI = recI;
+  while ((loopRecI & recMask) != recMask) {
+    n++;
+    void* loopRecP = getRecP(loopRecI, ri);
+    loopRecI = *(Uint32*)loopRecP;
+  }
+  assert(n == (Uint16)n);
+  return n;
+}
+
+Uint32
+SuperPool::getRecPageCount(RecInfo& ri)
+{
+  Uint32 n = 0;
+  for (Uint32 k = 0; k <= 2; k++)
+    n += ri.m_pageList[k].m_pageCount;
+  if (ri.m_freeRecI != NNIL)
+    n += 1;
+  return n;
+}
+
+Uint32
+SuperPool::getRecTotCount(RecInfo& ri)
+{
+  return ri.m_maxPerPage * getRecPageCount(ri);
+}
+
+Uint32
+SuperPool::getRecUseCount(RecInfo& ri)
+{
+  Uint32 n = ri.m_useCount;
+  // current page does not keep count
+  if (ri.m_freeRecI != NNIL) {
+    Uint32 maxPerPage = ri.m_maxPerPage;
+    Uint32 freeCount = getFreeCount(ri, ri.m_freeRecI);
+    assert(maxPerPage >= freeCount);
+    n += maxPerPage - freeCount;
+  }
+  return n;
+}
+
+// current page
+
+Uint32
+SuperPool::getRecPageList(RecInfo& ri, PageEnt& pe)
+{
+  if (pe.m_useCount == 0)
+    return 0;
+  if (pe.m_useCount < ri.m_maxPerPage)
+    return 1;
+  if (pe.m_useCount == ri.m_maxPerPage)
+    return 2;
+  assert(false);
+  return ~(Uint32)0;
+}
+
+void
+SuperPool::addCurrPage(RecInfo& ri, PtrI pageI)
+{
+  PageEnt& pe = getPageEnt(pageI);
+  ri.m_freeRecI = pe.m_freeRecI;
+  // remove from right list
+  Uint32 k = getRecPageList(ri, pe);
+  assert(k != 2);
+  removePage(ri.m_pageList[k], pageI);
+  assert(ri.m_useCount >= pe.m_useCount);
+  ri.m_useCount -= pe.m_useCount;
+}
+
+void
+SuperPool::removeCurrPage(RecInfo& ri)
+{
+  Uint32 recMask = m_recMask;
+  PtrI pageI = ri.m_freeRecI & ~ m_recMask;
+  // update page entry
+  PageEnt& pe = getPageEnt(pageI);
+  pe.m_freeRecI = ri.m_freeRecI;
+  Uint32 maxPerPage = ri.m_maxPerPage;
+  Uint32 freeCount = getFreeCount(ri, pe.m_freeRecI);
+  assert(maxPerPage >= freeCount);
+  pe.m_useCount = maxPerPage - freeCount;
+  // add to right list
+  Uint32 k = getRecPageList(ri, pe);
+  addHeadPage(ri.m_pageList[k], pageI);
+  ri.m_useCount += pe.m_useCount;
+  ri.m_freeRecI = NNIL;
+  if (k == 0) {
+    freeRecPages(ri);
  }
 }

+// page allocation
+
 bool
 SuperPool::getAvailPage(RecInfo& ri)
 {
  PtrI pageI;
-  if ((pageI = ri.m_activeList.m_headPageI) != RNIL ||
-      (pageI = ri.m_freeList.m_headPageI) != RNIL ||
+  if ((pageI = ri.m_pageList[1].m_headPageI) != RNIL ||
+      (pageI = ri.m_pageList[0].m_headPageI) != RNIL ||
      (pageI = getFreePage(ri)) != RNIL) {
-    setCurrPage(ri, pageI);
+    // the page is in record pool now
+    if (ri.m_freeRecI != NNIL)
+      removeCurrPage(ri);
+    addCurrPage(ri, pageI);
    return true;
  }
  return false;
@@ -222,121 +315,310 @@ SuperPool::getAvailPage(RecInfo& ri)
 SuperPool::PtrI
 SuperPool::getFreePage(RecInfo& ri)
 {
+  GroupPool& gp = ri.m_groupPool;
  PtrI pageI;
-  if (m_pageList.m_pageCount != 0) {
-    pageI = m_pageList.m_headPageI;
-    removePage(m_pageList, pageI);
-  } else {
-    pageI = getNewPage();
-    if (pageI == RNIL)
-      return RNIL;
+  if ((pageI = getFreePage(gp)) != RNIL) {
+    initFreePage(ri, pageI);
+    addHeadPage(ri.m_pageList[0], pageI);
+    return pageI;
+  }
+  return RNIL;
+}
+
+SuperPool::PtrI
+SuperPool::getFreePage(GroupPool& gp)
+{
+  PtrI pageI;
+  if ((pageI = gp.m_freeList.m_headPageI) != RNIL) {
+    removePage(gp.m_freeList, pageI);
+    return pageI;
+  }
+  if (gp.m_totPages < getMaxPages(gp) &&
+      (pageI = getFreePage()) != RNIL) {
+    gp.m_totPages++;
+    return pageI;
+  }
+  return RNIL;
+}
+
+SuperPool::PtrI
+SuperPool::getFreePage()
+{
+  PtrI pageI;
+  if ((pageI = m_freeList.m_headPageI) != RNIL) {
+    removePage(m_freeList, pageI);
+    return pageI;
  }
+  if ((pageI = getNewPage()) != RNIL) {
+    return pageI;
+  }
+  return RNIL;
+}
+
+void
+SuperPool::initFreePage(RecInfo& ri, PtrI pageI)
+{
  void* pageP = getPageP(pageI);
  // set up free record list
-  Uint32 maxUseCount = ri.m_maxUseCount;
+  Uint32 num = ri.m_maxPerPage;
  Uint32 recSize = ri.m_recSize;
  void* recP = (Uint8*)pageP;
  Uint32 irNext = 1;
-  while (irNext < maxUseCount) {
+  while (irNext < num) {
    *(Uint32*)recP = pageI | irNext;
    recP = (Uint8*)recP + recSize;
    irNext++;
  }
-  *(Uint32*)recP = RNIL;
-  // add to total record count
-  ri.m_totalRecCount += maxUseCount;
+  // terminator has all recBits set
+  *(Uint32*)recP = pageI | m_recMask;
  // set up new page entry
  PageEnt& pe = getPageEnt(pageI);
  new (&pe) PageEnt();
  pe.m_pageType = ri.m_recType;
  pe.m_freeRecI = pageI | 0;
  pe.m_useCount = 0;
-  // set type check bits
-  setCheckBits(pageI, ri.m_recType);
-  // add to record pool free list
-  addHeadPage(ri.m_freeList, pageI);
-  return pageI;
+  // set type check byte
+  Uint32 ip = pageI >> m_recBits;
+  m_pageType[ip] = (ri.m_recType & 0xFF);
 }

+// release
+
 void
-SuperPool::setSizes(size_t initSize, size_t incrSize, size_t maxSize)
+SuperPool::releaseNotCurrent(RecInfo& ri, PtrI recI)
 {
-  const Uint32 pageSize = m_pageSize;
-  m_initSize = SP_ALIGN_SIZE(initSize, pageSize);
-  m_incrSize = SP_ALIGN_SIZE(incrSize, pageSize);
-  m_maxSize = SP_ALIGN_SIZE(maxSize, pageSize);
+  PageEnt& pe = getPageEnt(recI);
+  void* recP = getRecP(recI, ri);
+  *(Uint32*)recP = pe.m_freeRecI;
+  pe.m_freeRecI = recI;
+  PtrI pageI = recI & ~ m_recMask;
+  Uint32 maxPerPage = ri.m_maxPerPage;
+  // move to right list
+  Uint32 k1 = getRecPageList(ri, pe);
+  assert(pe.m_useCount != 0);
+  pe.m_useCount--;
+  Uint32 k2 = getRecPageList(ri, pe);
+  if (k1 != k2) {
+    removePage(ri.m_pageList[k1], pageI);
+    addHeadPage(ri.m_pageList[k2], pageI);
+    if (k2 == 0) {
+      freeRecPages(ri);
+    }
+  }
+  assert(ri.m_useCount != 0);
+  ri.m_useCount--;
 }

+void
+SuperPool::freeRecPages(RecInfo& ri)
+{
+  // ignore current page
+  Uint32 useCount = ri.m_useCount;
+  Uint32 totCount = 0;
+  for (uint32 k = 0; k <= 2; k++)
+    totCount += ri.m_pageList[k].m_pageCount;
+  totCount *= ri.m_maxPerPage;
+  assert(totCount >= useCount);
+  if ((totCount - useCount) * ri.m_hyY < useCount * ri.m_hyX)
+    return;
+  // free all free pages
+  GroupPool& gp = ri.m_groupPool;
+  Uint32 minPages = getMinPages(gp);
+  PageList& pl = ri.m_pageList[0];
+  while (pl.m_pageCount != 0) {
+    PtrI pageI = pl.m_headPageI;
+    removePage(pl, pageI);
+    PageEnt& pe = getPageEnt(pageI);
+    pe.m_pageType = 0;
+    pe.m_freeRecI = NNIL;
+    Uint32 ip = pageI >> m_recBits;
+    m_pageType[ip] = 0;
+    if (gp.m_totPages <= minPages) {
+      addHeadPage(gp.m_freeList, pageI);
+    } else {
+      // return excess to super pool
+     addHeadPage(m_freeList, pageI);
+     assert(gp.m_totPages != 0);
+     gp.m_totPages--;
+    }
+  }
+}
+
+void
+SuperPool::freeAllRecPages(RecInfo& ri, bool force)
+{
+  GroupPool& gp = ri.m_groupPool;
+  if (ri.m_freeRecI != NNIL)
+    removeCurrPage(ri);
+  assert(force || ri.m_useCount == 0);
+  for (Uint32 k = 0; k <= 2; k++)
+    movePages(gp.m_freeList, ri.m_pageList[k]);
+}
+
+// size parameters
+
+void
+SuperPool::setInitPages(Uint32 initPages)
+{
+  m_initPages = initPages;
+}
+
+void
+SuperPool::setIncrPages(Uint32 incrPages)
+{
+  m_incrPages = incrPages;
+}
+
+void
+SuperPool::setMaxPages(Uint32 maxPages)
+{
+  m_maxPages = maxPages;
+}
+
+Uint32
+SuperPool::getGpMinPages()
+{
+  Uint32 minPages = (m_groupMinPct * m_totPages) / 100;
+  if (minPages < m_groupMinPages)
+    minPages = m_groupMinPages;
+  return minPages;
+}
+
+Uint32
+SuperPool::getMinPages(GroupPool& gp)
+{
+  Uint32 minPages = (gp.m_minPct * m_totPages) / 100;
+  if (minPages < gp.m_minPages)
+    minPages = gp.m_minPages;
+  return minPages;
+}
+
+Uint32
+SuperPool::getMaxPages(GroupPool& gp)
+{
+  Uint32 n1 = getGpMinPages();
+  Uint32 n2 = getMinPages(gp);
+  assert(n1 >= n2);
+  // pages reserved by other groups
+  Uint32 n3 = n1 - n2;
+  // rest can be claimed
+  Uint32 n4 = (m_totPages >= n3 ? m_totPages - n3 : 0);
+  return n4;
+}
+
+// debug
+
 void
 SuperPool::verify(RecInfo& ri)
 {
-  PageList* plList[3] = { &ri.m_freeList, &ri.m_activeList, &ri.m_fullList };
-  for (int i = 0; i < 3; i++) {
-    PageList& pl = *plList[i];
-    unsigned count = 0;
+  GroupPool& gp = ri.m_groupPool;
+  verifyPageList(m_freeList);
+  verifyPageList(gp.m_freeList);
+  for (Uint32 k = 0; k <= 2; k++) {
+    PageList& pl = ri.m_pageList[k];
+    verifyPageList(pl);
    PtrI pageI = pl.m_headPageI;
    while (pageI != RNIL) {
      PageEnt& pe = getPageEnt(pageI);
-      PtrI pageI1 = pe.m_prevPageI;
-      PtrI pageI2 = pe.m_nextPageI;
-      if (count == 0) {
-        assert(pageI1 == RNIL);
-      } else {
-        assert(pageI1 != RNIL);
-        PageEnt& pe1 = getPageEnt(pageI1);
-        assert(pe1.m_nextPageI == pageI);
-        if (pageI2 != RNIL) {
-          PageEnt& pe2 = getPageEnt(pageI2);
-          assert(pe2.m_prevPageI == pageI);
-        }
-      }
-      pageI = pageI2;
-      count++;
+      assert(pe.m_pageType == ri.m_recType);
+      Uint32 maxPerPage = ri.m_maxPerPage;
+      Uint32 freeCount = getFreeCount(ri, pe.m_freeRecI);
+      assert(maxPerPage >= freeCount);
+      Uint32 useCount = maxPerPage - freeCount;
+      assert(pe.m_useCount == useCount);
+      assert(k != 0 || useCount == 0);
+      assert(k != 1 || (useCount != 0 && freeCount != 0));
+      assert(k != 2 || freeCount == 0);
+      pageI = pe.m_nextPageI;
    }
-    assert(pl.m_pageCount == count);
  }
 }

+void
+SuperPool::verifyPageList(PageList& pl)
+{
+  Uint32 count = 0;
+  PtrI pageI = pl.m_headPageI;
+  while (pageI != RNIL) {
+    PageEnt& pe = getPageEnt(pageI);
+    if (pe.m_prevPageI == RNIL) {
+      assert(count == 0);
+    } else {
+      PageEnt& prevPe = getPageEnt(pe.m_prevPageI);
+      assert(prevPe.m_nextPageI == pageI);
+    }
+    if (pe.m_nextPageI == RNIL) {
+      assert(pl.m_tailPageI == pageI);
+    } else {
+      PageEnt& nextPe = getPageEnt(pe.m_nextPageI);
+      assert(nextPe.m_prevPageI == pageI);
+    }
+    if (pe.m_pageType != 0) {
+      assert(pe.m_freeRecI != NNIL);
+      PageEnt& pe2 = getPageEnt(pe.m_freeRecI);
+      assert(&pe == &pe2);
+    } else {
+      assert(pe.m_freeRecI == NNIL);
+    }
+    pageI = pe.m_nextPageI;
+    count++;
+  }
+  assert(pl.m_pageCount == count);
+}
+
+// GroupPool
+
+GroupPool::GroupPool(SuperPool& sp) :
+  m_superPool(sp),
+  m_minPct(0),
+  m_minPages(0),
+  m_totPages(0),
+  m_freeList()
+{
+}
+
+GroupPool::~GroupPool()
+{
+}
+
+void
+GroupPool::setMinPct(Uint32 minPct)
+{
+  SuperPool& sp = m_superPool;
+  // subtract any previous value
+  assert(sp.m_groupMinPct >= m_minPct);
+  sp.m_groupMinPct -= m_minPct;
+  // add new value
+  sp.m_groupMinPct += minPct;
+  m_minPct = minPct;
+}
+
+void
+GroupPool::setMinPages(Uint32 minPages)
+{
+  SuperPool& sp = m_superPool;
+  // subtract any previous value
+  assert(sp.m_groupMinPages >= m_minPages);
+  sp.m_groupMinPages -= m_minPages;
+  // add new value
+  sp.m_groupMinPages += minPages;
+  m_minPages = minPages;
+}
+
 // HeapPool

 HeapPool::HeapPool(Uint32 pageSize, Uint32 pageBits) :
  SuperPool(pageSize, pageBits),
  m_areaHead(),
  m_currArea(&m_areaHead),
-  m_lastArea(&m_areaHead),
-  m_mallocPart(4)
+  m_lastArea(&m_areaHead)
 {
 }

-bool
-HeapPool::init()
-{
-  const Uint32 pageBits = m_pageBits;
-  if (! SuperPool::init())
-    return false;;
-  // allocate page entry array
-  Uint32 peBytes = (1 << pageBits) * sizeof(PageEnt);
-  m_pageEnt = static_cast<PageEnt*>(malloc(peBytes));
-  if (m_pageEnt == 0)
-    return false;
-  memset(m_pageEnt, 0, peBytes);
-  // allocate type check array
-  Uint32 tcWords = 1 << (pageBits - (5 - SP_CHECK_LOG2));
-  m_typeCheck = static_cast<Uint32*>(malloc(tcWords << 2));
-  if (m_typeCheck == 0)
-    return false;
-  memset(m_typeCheck, 0, tcWords << 2);
-  // allocate initial data
-  assert(m_totalSize == 0);
-  if (! allocMoreData(m_initSize))
-    return false;
-  return true;
-}
-
 HeapPool::~HeapPool()
 {
  free(m_pageEnt);
-  free(m_typeCheck);
+  free(m_pageType);
  Area* ap;
  while ((ap = m_areaHead.m_nextArea) != 0) {
    m_areaHead.m_nextArea = ap->m_nextArea;
@@ -349,29 +631,28 @@ HeapPool::Area::Area() :
  m_nextArea(0),
  m_firstPageI(RNIL),
  m_currPage(0),
-  m_numPages(0),
-  m_memory(0)
+  m_memory(0),
+  m_pages(0),
+  m_numPages(0)
 {
 }

 SuperPool::PtrI
 HeapPool::getNewPage()
 {
-  const Uint32 pageSize = m_pageSize;
-  const Uint32 pageBits = m_pageBits;
-  const Uint32 recBits= 32 - pageBits;
  Area* ap = m_currArea;
  if (ap->m_currPage == ap->m_numPages) {
    // area is used up
    if (ap->m_nextArea == 0) {
-      // todo dynamic increase
-      assert(m_incrSize == 0);
-      return RNIL;
+      if (! allocMemory())
+        return RNIL;
    }
    ap = m_currArea = ap->m_nextArea;
+    assert(ap != 0);
  }
  assert(ap->m_currPage < ap->m_numPages);
  PtrI pageI = ap->m_firstPageI;
+  Uint32 recBits = m_recBits;
  Int32 ip = (Int32)pageI >> recBits;
  ip += ap->m_currPage;
  pageI = ip << recBits;
@@ -380,63 +661,90 @@ HeapPool::getNewPage()
 }

 bool
-HeapPool::allocMoreData(size_t size)
-{
-  const Uint32 pageSize = m_pageSize;
-  const Uint32 pageBits = m_pageBits;
-  const Uint32 recBits = 32 - pageBits;
-  const Uint32 incrSize = m_incrSize;
-  const Uint32 incrPages = incrSize / pageSize;
-  const Uint32 mallocPart = m_mallocPart;
-  size = SP_ALIGN_SIZE(size, pageSize);
-  if (incrSize != 0)
-    size = SP_ALIGN_SIZE(size, incrSize);
-  Uint32 needPages = size / pageSize;
-  while (needPages != 0) {
-    Uint32 wantPages = needPages;
-    if (incrPages != 0 && wantPages > incrPages)
-      wantPages = incrPages;
-    Uint32 tryPages = 0;
-    void* p1 = 0;
-    for (Uint32 i = mallocPart; i > 0 && p1 == 0; i--) {
-      // one page is usually wasted due to alignment to memory root
-      tryPages = ((wantPages + 1) * i) / mallocPart;
-      if (tryPages < 2)
-        break;
-      p1 = malloc(pageSize * tryPages);
-    }
-    if (p1 == 0)
+HeapPool::allocInit()
+{
+  Uint32 pageCount = (1 << m_pageBits);
+  if (m_pageEnt == 0) {
+    // allocate page entry array
+    Uint32 bytes = pageCount * sizeof(PageEnt);
+    m_pageEnt = static_cast<PageEnt*>(malloc(bytes));
+    if (m_pageEnt == 0)
      return false;
-    if (m_memRoot == 0) {
-      // set memory root at first "big" alloc
-      // assume malloc header makes later ip = -1 impossible
-      m_memRoot = p1;
-    }
-    void* p2 = SP_ALIGN_PTR(p1, m_memRoot, pageSize);
-    Uint32 numPages = tryPages - (p1 != p2);
-    my_ptrdiff_t ipL = ((Uint8*)p2 - (Uint8*)m_memRoot) / pageSize;
-    Int32 ip = (Int32)ipL;
-    Int32 lim = 1 << (pageBits - 1);
-    if (! (ip == ipL && -lim <= ip && ip + numPages < lim)) {
-      free(p1);
+    for (Uint32 i = 0; i < pageCount; i++)
+      new (&m_pageEnt[i]) PageEnt();
+  }
+  if (m_pageType == 0) {
+    // allocate type check array
+    Uint32 bytes = pageCount;
+    m_pageType = static_cast<Uint8*>(malloc(bytes));
+    if (m_pageType == 0)
      return false;
-    }
-    assert(ip != -1);
-    PtrI pageI = ip << recBits;
-    needPages = (needPages >= numPages ? needPages - numPages : 0);
-    m_totalSize += numPages * pageSize;
-    // allocate new area
+    memset(m_pageType, 0, bytes);
+  }
+  return true;
+}
+
+bool
+HeapPool::allocArea(Area* ap, Uint32 tryPages)
+{
+  Uint32 pageSize = m_pageSize;
+  // one page is usually lost due to alignment
+  Uint8* p1 = (Uint8*)malloc(pageSize * (tryPages + 1));
+  if (p1 == 0)
+    return false;
+  // align
+  UintPtr n1 = (UintPtr)p1;
+  UintPtr n2 = SP_ALIGN(n1, (UintPtr)pageSize);
+  Uint8* p2 = p1 + (n2 - n1);
+  assert(p2 >= p1 && p2 - p1 < pageSize && (UintPtr)p2 % pageSize == 0);
+  // set memory root to first allocated page
+  if (m_memRoot == 0)
+    m_memRoot = p2;
+  // convert to i-value
+  Uint32 pageI = getPageI(p2);
+  ap->m_firstPageI = pageI;
+  ap->m_currPage = 0;
+  ap->m_memory = p1;
+  ap->m_pages = p2;
+  ap->m_numPages = tryPages + (p1 == p2);
+  return true;
+}
+
+bool
+HeapPool::allocMemory()
+{
+  if (! allocInit())
+    return false;
+  // compute number of additional pages needed
+  if (m_maxPages <= m_totPages)
+    return false;
+  Uint32 needPages = (m_totPages == 0 ? m_initPages : m_incrPages);
+  if (needPages > m_maxPages - m_totPages)
+    needPages = m_maxPages - m_totPages;
+  while (needPages != 0) {
+    // add new area
    Area* ap = static_cast<Area*>(malloc(sizeof(Area)));
-    if (ap == 0) {
-      free(p1);
+    if (ap == 0)
      return false;
-    }
    new (ap) Area();
-    ap->m_firstPageI = pageI;
-    ap->m_numPages = numPages;
-    ap->m_memory = p1;
    m_lastArea->m_nextArea = ap;
    m_lastArea = ap;
+    // initial malloc is done in m_incrPages pieces
+    Uint32 wantPages = needPages;
+    if (m_incrPages != 0 && wantPages > m_incrPages)
+      wantPages = m_incrPages;
+    Uint32 tryPages = wantPages;
+    while (tryPages != 0) {
+      if (allocArea(ap, tryPages))
+        break;
+      tryPages /= 2;
+    }
+    if (tryPages == 0)
+      return false;
+    // update counts
+    Uint32 numPages = ap->m_numPages;
+    m_totPages += numPages;
+    needPages = (needPages > numPages ? needPages - numPages : 0);
  }
  return true;
 }
--- a/storage/ndb/src/kernel/vm/SuperPool.hpp
+++ b/storage/ndb/src/kernel/vm/SuperPool.hpp
@@ -22,20 +22,18 @@
 #include <pc.hpp>
 #include <ErrorReporter.hpp>

-#define NDB_SP_VERIFY_LEVEL 1
-
 /*
 * SuperPool - super pool for record pools (abstract class)
 *
- * Documents SuperPool and RecordPool<T>.
+ * Documents: SuperPool GroupPool RecordPool<T>
 *
- * GENERAL
+ * SUPER POOL
 *
 * A "super pool" is a shared pool of pages of fixed size.  A "record
 * pool" is a pool of records of fixed size.  One super pool instance is
- * used by any number of record pools to allocate their memory.
- * A special case is a "page pool" where a record is a simple page,
- * possibly smaller than super pool page.
+ * used by a number of record pools to allocate their memory.  A special
+ * case is a "page pool" where a record is a simple page whose size
+ * divides super pool page size.
 *
 * A record pool allocates memory in pages.  Thus each used page is
 * associated with one record pool and one record type.  The records on
@@ -49,27 +47,26 @@
 * record is stored as an "i-value" from which the record pointer "p" is
 * computed.  In super pool the i-value is a Uint32 with two parts:
 *
- * - "ip" index of page within super pool (high pageBits)
- * - "ir" index of record within page (low recBits)
+ * - "ip" index of page within super pool (high "pageBits")
+ * - "ir" index of record within page (low "recBits")
+ *
+ * At most 16 recBits are used, the rest are zero.
 *
 * The translation between "ip" and page address is described in next
 * section.  Once page address is known, the record address is found
 * from "ir" in the obvious way.
 *
- * The main advantage with i-value is that it can be verified.  The
- * level of verification depends on compile type (release, debug).
+ * One advantage of i-value is that it can be verified.  The level of
+ * verification can depend on compile options.
 *
- * - "v0" minimal sanity check
- * - "v1" check record type matches page type, see below
- * - "v2" check record is in use (not yet implemented)
+ * - "v1" check i-value specifies valid page
+ * - "v2" check record type matches page type, see below
+ * - "v3" check record is in use
+ * - "v4" check unused record is unmodified
 *
 * Another advantage of a 32-bit i-value is that it extends the space of
 * 32-bit addressable records on a 64-bit platform.
 *
- * RNIL is 0xffffff00 and indicates NULL i-value.  To avoid hitting RNIL
- * it is required that pageBits <= 30 and that the maximum value of the
- * range (2^pageBits-1) is not used.
- *
 * MEMORY ROOT
 *
 * This super pool requires a "memory root" i.e. a memory address such
@@ -77,13 +74,28 @@
 *
 *   page address = memory root + (signed)ip * page size
 *
- * This is possible on most platforms, provided that the memory root and
+ * This is possible on all platforms, provided that the memory root and
 * all pages are either on the heap or on the stack, in order to keep
 * the size of "ip" reasonably small.
 *
 * The cast (signed)ip is done as integer of pageBits bits.  "ip" has
 * same sign bit as i-value "i" so (signed)ip = (Int32)i >> recBits.
- * The RNIL restriction can be expressed as (signed)ip != -1.
+ *
+ * RESERVED I-VALUES
+ *
+ * RNIL is 0xffffff00 (signed -256).  It is used everywhere in NDB as
+ * "null pointer" i.e. as an i-value which does not point to a record.
+ * In addition the signed values -255 to -1 are reserved for use by the
+ * application.
+ *
+ * An i-value with all "ir" bits set is used as terminator in free
+ * record list.  Unlike RNIL, it still has valid page bits "ip".
+ *
+ * Following restrictions avoid hitting the reserved values:
+ *
+ * - pageBits is <= 30
+ * - the maximum "ip" value 2^pageBits-1 (signed -1) is not used
+ * - the maximum "ir" value 2^recBits-1 is not used
 *
 * PAGE ENTRIES
 *
@@ -95,37 +107,54 @@
 * - pointers (as i-values) to next and previous page in list
 *
 * Page entry cannot be stored on the page itself since this prevents
- * aligning pages to OS block size and the use of BATs (don't ask) for
- * page pools in NDB.  For now the implementation provides an array of
- * page entries with place for all (2^pageBits) entries.
+ * aligning pages to OS block size and the use of BATs for page pools in
+ * NDB.  For now the implementation provides an array of page entries
+ * with place for all potential (2^pageBits) entries.
 *
 * PAGE TYPE
 *
- * Page type is (in principle) unique to the record pool using the super
- * pool.  It is assigned in record pool constructor.  Page type zero
- * means that the page is free i.e. not allocated to a record pool.
+ * Page type is unique to the record pool using the super pool.  It is
+ * assigned in record pool constructor.  Page type zero means that the
+ * page is free i.e. not allocated to a record pool.
 *
- * Each "i-p" conversion checks ("v1") that the record belongs to same
+ * Each "i-p" conversion checks ("v2") that the record belongs to same
 * pool as the page.  This check is much more common than page or record
- * allocation.  To make it cache effective, there is a separate array of
- * reduced "type bits" (computed from real type).
+ * allocation.  To make it cache effective, there is a separate page
+ * type array.  It truncates type to one non-zero byte.
+ *
+ * GROUP POOL
+ *
+ * Each record pool belongs to a group.  The group specifies minimum
+ * size or memory percentage the group must be able to allocate.  The
+ * sum of the minimum sizes of group pools is normally smaller than
+ * super pool size.  This provides unclaimed memory which a group can
+ * use temporarily to allocate more than its minimum.
 *
- * FREE LISTS
+ * The record pools within a group compete freely for the available
+ * memory within the group.
 *
- * A record is either used or on the free list of the record pool.
- * A page has a use count i.e. number of used records.  When use count
- * drops to zero the page can be returned to the super pool.  This is
- * not necessarily done at once, or ever.
+ * Typical exmaple is group of all metadata pools.  The group allows
+ * specifying the memory to reserve for metadata, without having to
+ * specify number of tables, attributes, indexes, triggers, etc.
 *
- * To make freeing pages feasible, the record pool free list has two
- * levels.  There are available pages (some free) and a singly linked
- * free list within the page.  A page allocated to record pool is on one
- * of 4 lists:
+ * PAGE LISTS
 *
- * - free page list (all free, available)
- * - active page list (some free, some used, available)
- * - full page list (none free)
- * - current page (list of 1), see below
+ * Super pool has free page list.  Each group pool uses it to allocate
+ * its own free page list.  And each record pool within the group uses
+ * the group's free list to allocate its pages.
+ *
+ * A page allocated to a record pool has a use count i.e. number of used
+ * records.  When use count drops to zero the page can be returned to
+ * the group.  This is not necessarily done at once.
+ *
+ * The list of free records in a record pool has two levels.  There are
+ * available pages (some free) and a singly linked free list within the
+ * page.  A page allocated to record pool is on one of 4 lists:
+ *
+ * - free page (all free, available, could be returned to group)
+ * - busy page (some free, some used, available)
+ * - full page (none free)
+ * - current page (list of one), see below
 *
 * Some usage types (temporary pools) may never free records.  They pay
 * a small penalty for the extra overhead.
@@ -133,7 +162,7 @@
 * RECORD POOL
 *
 * A pool of records which allocates its memory from a super pool
- * instance specified in the constructor.  There are 3 basic operations:
+ * instance via a group pool.  There are 3 basic operations:
 *
 * - getPtr - translate i-value to pointer-to-record p
 * - seize - allocate record
@@ -141,76 +170,64 @@
 *
 * CURRENT PAGE
 *
- * getPtr is a fast computation which does not touch the page.  For
- * seize and release there is an optimization:
+ * getPtr is a fast computation which does not touch the page entry.
+ * For seize (and release) there is a small optimization.
 *
- * Define "current page" as page of latest seize or release.  Its page
- * entry is cached under record pool instance.  The page is removed from
- * its normal list.  Seize and release on current page are fast and
- * avoid touching the page.  The current page is used until
+ * The "current page" is the page of latest seize.  It is unlinked from
+ * its normal list and the free record pointer is stored under record
+ * pool instance.
 *
- * - seize and current page is full
- * - release and the page is not current page
+ * The page remains current until there is a seize and the page is full.
+ * Then the real page entry and its list membership are updated, and
+ * a new page is made current.
 *
- * Then the real page entry is updated and the page is added to the
- * appropriate list, and a new page is made current.
+ * This implies that each (active) record pool allocates at least one
+ * page which is never returned to the group.
 *
 * PAGE POLICY
 *
- * Allocating new page to record pool is expensive.  Therefore record
- * pool should not always return empty pages to super pool.  There are
- * two trivial policies, each with problems:
+ * A group pool returns its "excess" (above minimum) free pages to the
+ * super pool immediately.
+ *
+ * Allocating a new page to a record pool is expensive due to free list
+ * setup.  Therefore a record pool should not always return empty pages
+ * to the group.  Policies:
+ *
+ * - "pp1" never return empty page to the group
+ * - "pp2" always return empty (non-current) page to the group
+ * - "pp3" simple hysteresis
 *
- * - "pp1" never return empty page to super pool
- * - "pp2" always return empty page to super pool
+ * Last one "pp3" is used.  It works as follows:
 *
- * This implementation uses "pp2" for now.  A real policy is implemented
- * in next version.
+ * When a page becomes free, check if number of free records exceeds
+ * some fixed fraction of all records.  If it does, move all free pages
+ * to the group.  Current page is ignored in the check.
 *
- * OPEN ISSUES AND LIMITATIONS
+ * TODO
 *
- * - smarter (virtual) placement of check bits & page entries
- * - should getPtr etc be inlined?  (too much code)
- * - real page policy
- * - other implementations (only HeapPool is done)
- * - super pool list of all record pools, for statistics etc
- * - access by multiple threads is not supported
+ * Define abstract class SuperAlloc.  Make SuperPool a concrete class
+ * with SuperAlloc instance in ctor.  Replace HeapPool by HeapAlloc.
 */

-// align size
-#define SP_ALIGN_SIZE(sz, al) \
-  (((sz) + (al) - 1) & ~((al) - 1))
-
-// align pointer relative to base
-#define SP_ALIGN_PTR(p, base, al) \
-  (void*)((Uint8*)(base) + SP_ALIGN_SIZE((Uint8*)(p) - (Uint8*)(base), (al)))
+// Types forward.
+class GroupPool;

 class SuperPool {
 public:
-  // Type of i-value, used to reference both pages and records.  Page
-  // index "ip" occupies the high bits.  The i-value of a page is same
-  // as i-value of record 0 on the page.
+  // Type of i-value, used to reference both pages and records.
  typedef Uint32 PtrI;

-  // Size and address alignment given as number of bytes (power of 2).
-  STATIC_CONST( SP_ALIGN = 8 );
-
-  // Page entry.  Current|y allocated as array of (2^pageBits).
+  // Page entry.
  struct PageEnt {
    PageEnt();
-    Uint32 m_pageType;
-    Uint32 m_freeRecI;
-    Uint32 m_useCount;
+    Uint16 m_pageType;          // zero if not in record pool
+    Uint16 m_useCount;          // used records on the page
+    PtrI m_freeRecI;            // first free record on the page
    PtrI m_nextPageI;
    PtrI m_prevPageI;
  };

-  // Number of bits for cache effective type check given as log of 2.
-  // Example: 2 means 4 bits and uses 32k for 2g of 32k pages.
-  STATIC_CONST( SP_CHECK_LOG2 = 2 );
-
-  // Doubly-linked list of pages.  There is one free list in super pool
-  // and free, active, full list in each record pool.
+  // Doubly-linked list of page entries.
  struct PageList {
    PageList();
    PageList(PtrI pageI);
@@ -219,234 +236,209 @@ public:
    Uint32 m_pageCount;
  };

-  // Record pool information.  Each record pool instance contains one.
-  struct RecInfo {
-    RecInfo(Uint32 recType, Uint32 recSize);
-    const Uint32 m_recType;
-    const Uint32 m_recSize;
-    Uint32 m_maxUseCount;       // could be computed
-    Uint32 m_currPageI;         // current page
-    Uint32 m_currFreeRecI;
-    Uint32 m_currUseCount;
-    Uint32 m_totalUseCount;     // total per pool
-    Uint32 m_totalRecCount;
-    PageList m_freeList;
-    PageList m_activeList;
-    PageList m_fullList;
-  };
-
-  // Constructor.  Gives page size in bytes (excluding page header) and
+  // Constructor.  Gives page size in bytes (must be power of 2) and
  // number of bits to use for page index "ip" in i-value.
  SuperPool(Uint32 pageSize, Uint32 pageBits);

-  // Initialize.  Must be called after setting sizes or other parameters
-  // and before the pool is used.
-  virtual bool init();
-
  // Destructor.
  virtual ~SuperPool() = 0;

-  // Translate i-value to page entry.
+  // Move all pages from second list to end of first list.
+  void movePages(PageList& pl1, PageList& pl2);
+
+  // Add page to beginning of page list.
+  void addHeadPage(PageList& pl, PtrI pageI);
+
+  // Add page to end of page list.
+  void addTailPage(PageList& pl, PtrI pageI);
+
+  // Remove any page from page list.
+  void removePage(PageList& pl, PtrI pageI);
+
+  // Translate i-value ("ri" ignored) to page entry.
  PageEnt& getPageEnt(PtrI pageI);

-  // Translate i-value to page address.
+  // Translate i-value ("ri" ignored) to page address.
  void* getPageP(PtrI pageI);

-  // Translate page address to i-value (unused).
+  // Translate page address to i-value.  Address must be page-aligned to
+  // memory root.  Returns RNIL if "ip" range exceeded.
  PtrI getPageI(void* pageP);

-  // Given type, return non-zero reduced type check bits.
-  Uint32 makeCheckBits(Uint32 type);
-
-  // Get type check bits from type check array.
-  Uint32 getCheckBits(PtrI pageI);
-
-  // Set type check bits in type check array.
-  void setCheckBits(PtrI pageI, Uint32 type);
+  // Record pool info.
+  struct RecInfo {
+    RecInfo(GroupPool& gp, Uint32 recSize);
+    GroupPool& m_groupPool;
+    Uint32 m_recSize;
+    Uint16 m_recType;
+    Uint16 m_maxPerPage;
+    PtrI m_freeRecI;            // first free record on current page
+    Uint32 m_useCount;          // used records excluding current page
+    PageList m_pageList[3];     // 0-free 1-busy 2-full
+    Uint16 m_hyX;               // hysteresis fraction x/y in "pp3"
+    Uint16 m_hyY;
+  };

  // Translate i-value to record address.
  void* getRecP(PtrI recI, RecInfo& ri);

-  // Move all pages from second list to end of first list.
-  void movePages(PageList& pl1, PageList& pl2);
+  // Count records on page free list.
+  Uint32 getFreeCount(RecInfo& ri, PtrI freeRecPtrI);

-  // Add page to beginning of page list.
-  void addHeadPage(PageList& pl, PtrI pageI);
+  // Compute total number of pages in pool.
+  Uint32 getRecPageCount(RecInfo& ri);

-  // Add page to end of page list.
-  void addTailPage(PageList& pl, PtrI pageI);
+  // Compute total number of records (used or not) in pool.
+  Uint32 getRecTotCount(RecInfo& ri);

-  // Remove any page from page list.
-  void removePage(PageList& pl, PtrI pageI);
+  // Compute total number of used records in pool.
+  Uint32 getRecUseCount(RecInfo& ri);
+
+  // Compute record pool page list index (0,1,2).
+  Uint32 getRecPageList(RecInfo& ri, PageEnt& pe);
+
+  // Add current page.
+  void addCurrPage(RecInfo& ri, PtrI pageI);

-  // Set current page.  Previous current page is updated and added to
-  // appropriate list.
-  void setCurrPage(RecInfo& ri, PtrI pageI);
+  // Remove current page.
+  void removeCurrPage(RecInfo& ri);

  // Get page with some free records and make it current.  Takes head of
-  // active or free list, or else gets free page from super pool.
+  // used or free list, or else gets free page from group pool.
  bool getAvailPage(RecInfo& ri);

-  // Get free page from super pool and add it to record pool free list.
-  // This is an expensive subroutine of getAvailPage().
+  // Get free page from group pool and add it to record pool free list.
+  // This is an expensive subroutine of getAvailPage(RecInfo&):
  PtrI getFreePage(RecInfo& ri);

-  // Get new free page from the implementation.
+  // Get free detached (not on list) page from group pool.
+  PtrI getFreePage(GroupPool& gp);
+
+  // Get free detached page from super pool.
+  PtrI getFreePage();
+
+  // Get new free detached page from the implementation.
  virtual PtrI getNewPage() = 0;

-  // Set 3 size parameters, rounded to page size.  If called before
-  // init() then init() allocates the initial size.
-  void setSizes(size_t initSize = 0, size_t incrSize = 0, size_t maxSize = 0);
+  // Initialize free list etc.  Subroutine of getFreePage(RecInfo&).
+  void initFreePage(RecInfo& ri, PtrI pageI);

-  const Uint32 m_pageSize;
-  const Uint32 m_pageBits;
-  // implementation must set up these pointers
-  void* m_memRoot;
-  PageEnt* m_pageEnt;
-  Uint32* m_typeCheck;
-  Uint32 m_typeSeq;
-  PageList m_pageList;
-  size_t m_totalSize;
-  size_t m_initSize;
-  size_t m_incrSize;
-  size_t m_maxSize;
+  // Release record which is not on current page.
+  void releaseNotCurrent(RecInfo& ri, PtrI recI);
+
+  // Free pages from record pool according to page policy.
+  void freeRecPages(RecInfo& ri);
+
+  // Free all pages in record pool.
+  void freeAllRecPages(RecInfo& ri, bool force);
+
+  // Set pool size parameters in pages.  Call allocMemory() for changes
+  // (such as extra mallocs) to take effect.
+  void setInitPages(Uint32 initPages);
+  void setIncrPages(Uint32 incrPages);
+  void setMaxPages(Uint32 maxPages);
+
+  // Get number of pages reserved by all groups.
+  Uint32 getGpMinPages();
+
+  // Get number of pages reserved to a group.
+  Uint32 getMinPages(GroupPool& gp);
+
+  // Get max number of pages a group can try to allocate.
+  Uint32 getMaxPages(GroupPool& gp);
+
+  // Allocate more memory according to current parameters.  Returns
+  // false if no new memory was allocated.   Otherwise returns true,
+  // even if the amount allocated was less than requested.
+  virtual bool allocMemory() = 0;

  // Debugging.
  void verify(RecInfo& ri);
+  void verifyPageList(PageList& pl);
+
+  // Super pool parameters.
+  const Uint32 m_pageSize;
+  const Uint16 m_pageBits;
+  const Uint16 m_recBits;
+  const Uint32 m_recMask;
+  // Implementation must set up these 3 pointers.
+  void* m_memRoot;
+  PageEnt* m_pageEnt;
+  Uint8* m_pageType;
+  // Free page list.
+  PageList m_freeList;
+  // Free pages and sizes.
+  Uint32 m_initPages;
+  Uint32 m_incrPages;
+  Uint32 m_maxPages;
+  Uint32 m_totPages;
+  Uint32 m_typeCount;
+  // Reserved and allocated by group pools.
+  Uint32 m_groupMinPct;
+  Uint32 m_groupMinPages;
+  Uint32 m_groupTotPages;
 };

 inline SuperPool::PageEnt&
 SuperPool::getPageEnt(PtrI pageI)
 {
-  Uint32 ip = pageI >> (32 - m_pageBits);
+  Uint32 ip = pageI >> m_recBits;
  return m_pageEnt[ip];
 }

 inline void*
 SuperPool::getPageP(PtrI ptrI)
 {
-  Int32 ip = (Int32)ptrI >> (32 - m_pageBits);
-  my_ptrdiff_t sz = m_pageSize;
-  void* pageP = (Uint8*)m_memRoot + ip * sz;
-  return pageP;
-}
-
-inline Uint32
-SuperPool::makeCheckBits(Uint32 type)
-{
-  Uint32 shift = 1 << SP_CHECK_LOG2;
-  Uint32 mask = (1 << shift) - 1;
-  return 1 + type % mask;
-}
-
-inline Uint32
-SuperPool::getCheckBits(PtrI pageI)
-{
-  Uint32 ip = pageI >> (32 - m_pageBits);
-  Uint32 xp = ip >> (5 - SP_CHECK_LOG2);
-  Uint32 yp = ip & (1 << (5 - SP_CHECK_LOG2)) - 1;
-  Uint32& w = m_typeCheck[xp];
-  Uint32 shift = 1 << SP_CHECK_LOG2;
-  Uint32 mask = (1 << shift) - 1;
-  // get
-  Uint32 bits = (w >> yp * shift) & mask;
-  return bits;
-}
-
-inline void
-SuperPool::setCheckBits(PtrI pageI, Uint32 type)
-{
-  Uint32 ip = pageI >> (32 - m_pageBits);
-  Uint32 xp = ip >> (5 - SP_CHECK_LOG2);
-  Uint32 yp = ip & (1 << (5 - SP_CHECK_LOG2)) - 1;
-  Uint32& w = m_typeCheck[xp];
-  Uint32 shift = 1 << SP_CHECK_LOG2;
-  Uint32 mask = (1 << shift) - 1;
-  // set
-  Uint32 bits = makeCheckBits(type);
-  w &= ~(mask << yp * shift);
-  w |= (bits << yp * shift);
+  Int32 ip = (Int32)ptrI >> m_recBits;
+  return (Uint8*)m_memRoot + ip * (my_ptrdiff_t)m_pageSize;
 }

 inline void*
 SuperPool::getRecP(PtrI ptrI, RecInfo& ri)
 {
-  const Uint32 recMask = (1 << (32 - m_pageBits)) - 1;
-  PtrI pageI = ptrI & ~recMask;
-#if NDB_SP_VERIFY_LEVEL >= 1
-  Uint32 bits1 = getCheckBits(pageI);
-  Uint32 bits2 = makeCheckBits(ri.m_recType);
-  assert(bits1 == bits2);
-#endif
-  void* pageP = getPageP(pageI);
-  Uint32 ir = ptrI & recMask;
-  void* recP = (Uint8*)pageP + ir * ri.m_recSize;
-  return recP;
+  Uint32 ip = ptrI >> m_recBits;
+  assert(m_pageType[ip] == (ri.m_recType & 0xFF));
+  Uint32 ir = ptrI & m_recMask;
+  return (Uint8*)getPageP(ptrI) + ir * ri.m_recSize;
 }

 /*
- * HeapPool - SuperPool on heap (concrete class)
- *
- * A super pool based on malloc with memory root on the heap.  This
- * pool type has 2 realistic uses:
- *
- * - a small pool with only initial malloc and pageBits set to match
- * - the big pool from which all heap allocations are done
- *
- * A "smart" malloc may break "ip" limit by using different VM areas for
- * different sized requests.  For this reason malloc is done in units of
- * increment size if possible.   Memory root is set to start of first
- * malloc.
+ * GroupPool - subset of a super pool pages (concrete class)
 */

-class HeapPool : public SuperPool {
+class GroupPool {
 public:
-  // Describes malloc area.  The areas are kept in singly linked list.
-  // There is a list head and pointers to current and last area.
-  struct Area {
-    Area();
-    Area* m_nextArea;
-    PtrI m_firstPageI;
-    Uint32 m_currPage;
-    Uint32 m_numPages;
-    void* m_memory;
-  };
+  // Types.
+  typedef SuperPool::PageList PageList;

  // Constructor.
-  HeapPool(Uint32 pageSize, Uint32 pageBits);
-
-  // Initialize.
-  virtual bool init();
+  GroupPool(SuperPool& sp);

  // Destructor.
-  virtual ~HeapPool();
+  ~GroupPool();

-  // Use malloc to allocate more.
-  bool allocMoreData(size_t size);
+  // Set minimum pct reserved in super pool.
+  void setMinPct(Uint32 resPct);

-  // Get new page from current area.
-  virtual PtrI getNewPage();
-
-  // List of malloc areas.
-  Area m_areaHead;
-  Area* m_currArea;
-  Area* m_lastArea;
+  // Set minimum pages reserved in super pool.
+  void setMinPages(Uint32 resPages);

-  // Fraction of malloc size to try if cannot get all in one.
-  Uint32 m_mallocPart;
+  SuperPool& m_superPool;
+  Uint32 m_minPct;
+  Uint32 m_minPages;
+  Uint32 m_totPages;
+  PageList m_freeList;
 };

 /*
 * RecordPool -  record pool using one super pool instance (template)
- *
- * Documented under SuperPool.  Satisfies ArrayPool interface.
 */

 template <class T>
 class RecordPool {
 public:
  // Constructor.
-  RecordPool(SuperPool& superPool);
+  RecordPool(GroupPool& gp);

  // Destructor.
  ~RecordPool();
@@ -462,9 +454,9 @@ public:

  // todo variants of basic methods

-  // Return all pages to super pool.  The force flag is required if
+  // Return all pages to group pool.  The force flag is required if
  // there are any used records.
-  void free(bool force);
+  void freeAllRecPages(bool force);

  SuperPool& m_superPool;
  SuperPool::RecInfo m_recInfo;
@@ -472,24 +464,17 @@ public:

 template <class T>
 inline
-RecordPool<T>::RecordPool(SuperPool& superPool) :
-  m_superPool(superPool),
-  m_recInfo(1 + superPool.m_typeSeq++, sizeof(T))
+RecordPool<T>::RecordPool(GroupPool& gp) :
+  m_superPool(gp.m_superPool),
+  m_recInfo(gp, sizeof(T))
 {
-  SuperPool::RecInfo& ri = m_recInfo;
-  assert(sizeof(T) == SP_ALIGN_SIZE(sizeof(T), sizeof(Uint32)));
-  Uint32 maxUseCount = superPool.m_pageSize / sizeof(T);
-  Uint32 sizeLimit = 1 << (32 - superPool.m_pageBits);
-  if (maxUseCount >= sizeLimit)
-    maxUseCount = sizeLimit;
-  ri.m_maxUseCount = maxUseCount;
 }

 template <class T>
 inline
 RecordPool<T>::~RecordPool()
 {
-  free(true);
+  freeAllRecPages(true);
 }

 template <class T>
@@ -506,18 +491,19 @@ RecordPool<T>::seize(Ptr<T>& ptr)
 {
  SuperPool& sp = m_superPool;
  SuperPool::RecInfo& ri = m_recInfo;
-  if (ri.m_currFreeRecI != RNIL || sp.getAvailPage(ri)) {
-    SuperPool::PtrI recI = ri.m_currFreeRecI;
+  Uint32 recMask = sp.m_recMask;
+  // get current page
+  if ((ri.m_freeRecI & recMask) != recMask ||
+      sp.getAvailPage(ri)) {
+    SuperPool::PtrI recI = ri.m_freeRecI;
    void* recP = sp.getRecP(recI, ri);
-    ri.m_currFreeRecI = *(Uint32*)recP;
-    Uint32 useCount = ri.m_currUseCount;
-    assert(useCount < ri.m_maxUseCount);
-    ri.m_currUseCount = useCount + 1;
-    ri.m_totalUseCount++;
+    ri.m_freeRecI = *(Uint32*)recP;
    ptr.i = recI;
    ptr.p = static_cast<T*>(recP);
    return true;
  }
+  ptr.i = RNIL;
+  ptr.p = 0;
  return false;
 }

@@ -527,35 +513,79 @@ RecordPool<T>::release(Ptr<T>& ptr)
 {
  SuperPool& sp = m_superPool;
  SuperPool::RecInfo& ri = m_recInfo;
-  const Uint32 recMask = (1 << (32 - sp.m_pageBits)) - 1;
  SuperPool::PtrI recI = ptr.i;
-  SuperPool::PtrI pageI = recI & ~recMask;
-  if (pageI != ri.m_currPageI) {
-    sp.setCurrPage(ri, pageI);
+  Uint32 recMask = sp.m_recMask;
+  // check if current page
+  if ((recI & ~recMask) == (ri.m_freeRecI & ~recMask)) {
+    void* recP = sp.getRecP(recI, ri);
+    *(Uint32*)recP = ri.m_freeRecI;
+    ri.m_freeRecI = recI;
+  } else {
+    sp.releaseNotCurrent(ri, recI);
  }
-  void* recP = sp.getRecP(recI, ri);
-  *(Uint32*)recP = ri.m_currFreeRecI;
-  ri.m_currFreeRecI = recI;
-  Uint32 useCount = ri.m_currUseCount;
-  assert(useCount != 0);
-  ri.m_currUseCount = useCount - 1;
-  ri.m_totalUseCount--;
  ptr.i = RNIL;
  ptr.p = 0;
 }

 template <class T>
 inline void
-RecordPool<T>::free(bool force)
+RecordPool<T>::freeAllRecPages(bool force)
 {
  SuperPool& sp = m_superPool;
-  SuperPool::RecInfo& ri = m_recInfo;
-  sp.setCurrPage(ri, RNIL);
-  assert(force || ri.m_totalUseCount == 0);
-  sp.movePages(sp.m_pageList, ri.m_freeList);
-  sp.movePages(sp.m_pageList, ri.m_activeList);
-  sp.movePages(sp.m_pageList, ri.m_fullList);
-  ri.m_totalRecCount = 0;
+  sp.freeAllRecPages(m_recInfo, force);
 }

+/*
+ * HeapPool - SuperPool on heap (concrete class)
+ *
+ * A super pool based on malloc with memory root on the heap.  This
+ * pool type has 2 realistic uses:
+ *
+ * - a small pool with only initial malloc and pageBits set to match
+ * - the big pool from which all heap allocations are done
+ *
+ * A smart malloc may break "ip" limit by using different VM areas for
+ * different sized requests.  For this reason malloc is done in units of
+ * increment size if possible.  Memory root is set to the page-aligned
+ * address from first page malloc.
+ */
+
+class HeapPool : public SuperPool {
+public:
+  // Describes malloc area.  The areas are kept in singly linked list.
+  // There is a list head and pointers to current and last area.
+  struct Area {
+    Area();
+    Area* m_nextArea;
+    PtrI m_firstPageI;
+    Uint32 m_currPage;
+    void* m_memory;     // from malloc
+    void* m_pages;      // page-aligned pages
+    Uint32 m_numPages;  // number of pages
+  };
+
+  // Constructor.
+  HeapPool(Uint32 pageSize, Uint32 pageBits);
+
+  // Destructor.
+  virtual ~HeapPool();
+
+  // Get new page from current area.
+  virtual PtrI getNewPage();
+
+  // Allocate fixed arrays.
+  bool allocInit();
+
+  // Allocate array of aligned pages.
+  bool allocArea(Area* ap, Uint32 tryPages);
+
+  // Allocate memory.
+  virtual bool allocMemory();
+
+  // List of malloc areas.
+  Area m_areaHead;
+  Area* m_currArea;
+  Area* m_lastArea;
+};
+
 #endif
--- a/storage/ndb/src/kernel/vm/testSuperPool.cpp
+++ b/storage/ndb/src/kernel/vm/testSuperPool.cpp
@@ -81,19 +81,22 @@ cmpPtrP(const void* a, const void* b)

 static Uint32 loopcount = 3;

-template <Uint32 sz>
-void
-sp_test(SuperPool& sp)
+template <class T>
+static void
+sp_test(GroupPool& gp)
 {
-  typedef A<sz> T;
-  RecordPool<T> rp(sp);
+  SuperPool& sp = gp.m_superPool;
+  RecordPool<T> rp(gp);
+  assert(gp.m_totPages == gp.m_freeList.m_pageCount);
  SuperPool::RecInfo& ri = rp.m_recInfo;
-  Uint32 pageCount = sp.m_totalSize / sp.m_pageSize;
-  Uint32 perPage = rp.m_recInfo.m_maxUseCount;
+  Uint32 pageCount = sp.m_totPages;
+  Uint32 perPage = rp.m_recInfo.m_maxPerPage;
  Uint32 perPool = perPage * pageCount;
  ndbout << "pages=" << pageCount << " perpage=" << perPage << " perpool=" << perPool << endl;
  Ptr<T>* ptrList = new Ptr<T> [perPool];
  memset(ptrList, 0x1f, perPool * sizeof(Ptr<T>));
+  Uint32 verify = 1000;
+  Uint32 useCount;
  Uint32 loop;
  for (loop = 0; loop < loopcount; loop++) {
    ndbout << "loop " << loop << endl;
@@ -101,25 +104,26 @@ sp_test(SuperPool& sp)
    // seize all
    ndbout << "seize all" << endl;
    for (i = 0; i < perPool + 1; i++) {
+      if (verify == 0 || urandom(perPool) < verify)
+        sp.verify(ri);
      j = i;
-      sp.verify(ri);
      Ptr<T> ptr1 = { 0, RNIL };
      if (! rp.seize(ptr1))
        break;
-      // write value
      ptr1.p->fill();
      ptr1.p->check();
-      // verify getPtr
      Ptr<T> ptr2 = { 0, ptr1.i };
      rp.getPtr(ptr2);
      assert(ptr1.i == ptr2.i && ptr1.p == ptr2.p);
-      // save
      ptrList[j] = ptr1;
    }
-    assert(i == perPool);
-    assert(ri.m_totalUseCount == perPool && ri.m_totalRecCount == perPool);
    sp.verify(ri);
+    ndbout << "seized " << i << endl;
+    assert(i == perPool);
+    useCount = sp.getRecUseCount(ri);
+    assert(useCount == perPool);
    // check duplicates
+    ndbout << "check dups" << endl;
    {
      Ptr<T>* ptrList2 = new Ptr<T> [perPool];
      memcpy(ptrList2, ptrList, perPool * sizeof(Ptr<T>));
@@ -135,7 +139,8 @@ sp_test(SuperPool& sp)
    ndbout << "release all" << endl;
    Uint32 coprime = random_coprime(perPool);
    for (i = 0; i < perPool; i++) {
-      sp.verify(ri);
+      if (verify == 0 || urandom(perPool) < verify)
+        sp.verify(ri);
      switch (loop % 3) {
      case 0:   // ascending
        j = i;
@@ -153,27 +158,31 @@ sp_test(SuperPool& sp)
      rp.release(ptr);
      assert(ptr.i == RNIL && ptr.p == 0);
    }
-    sp.setCurrPage(ri, RNIL);
-    assert(ri.m_totalUseCount == 0 && ri.m_totalRecCount == 0);
    sp.verify(ri);
+    useCount = sp.getRecUseCount(ri);
+    assert(useCount == 0);
    // seize/release at random
    ndbout << "seize/release at random" << endl;
    for (i = 0; i < loopcount * perPool; i++) {
+      if (verify == 0 || urandom(perPool) < verify)
+        sp.verify(ri);
      j = urandom(perPool);
      Ptr<T>& ptr = ptrList[j];
      if (ptr.i == RNIL) {
-        rp.seize(ptr);
-        ptr.p->fill();
+        if (rp.seize(ptr))
+          ptr.p->fill();
      } else {
        ptr.p->check();
        rp.release(ptr);
      }
    }
-    ndbout << "used " << ri.m_totalUseCount << endl;
+    ndbout << "used " << ri.m_useCount << endl;
    sp.verify(ri);
    // release all
    ndbout << "release all" << endl;
    for (i = 0; i < perPool; i++) {
+      if (verify == 0 || urandom(perPool) < verify)
+        sp.verify(ri);
      j = i;
      Ptr<T>& ptr = ptrList[j];
      if (ptr.i != RNIL) {
@@ -181,40 +190,54 @@ sp_test(SuperPool& sp)
        rp.release(ptr);
      }
    }
-    sp.setCurrPage(ri, RNIL);
-    assert(ri.m_totalUseCount == 0 && ri.m_totalRecCount == 0);
    sp.verify(ri);
+    useCount = sp.getRecUseCount(ri);
+    assert(useCount == 0);
  }
  // done
  delete [] ptrList;
 }

-static Uint32 pageCount = 99;
 static Uint32 pageSize = 32768;
-static Uint32 pageBits = 15;
-
-const Uint32 sz1 = 3, sz2 = 4, sz3 = 53, sz4 = 424, sz5 = 5353;
-
-template void sp_test<sz1>(SuperPool& sp);
-template void sp_test<sz2>(SuperPool& sp);
-template void sp_test<sz3>(SuperPool& sp);
-template void sp_test<sz4>(SuperPool& sp);
-template void sp_test<sz5>(SuperPool& sp);
+static Uint32 pageBits = 17;
+
+const Uint32 sz1 = 3;
+const Uint32 sz2 = 4;
+const Uint32 sz3 = 53;
+const Uint32 sz4 = 424;
+const Uint32 sz5 = 5353;
+
+typedef A<sz1> T1;
+typedef A<sz2> T2;
+typedef A<sz3> T3;
+typedef A<sz4> T4;
+typedef A<sz5> T5;
+
+template static void sp_test<T1>(GroupPool& sp);
+template static void sp_test<T2>(GroupPool& sp);
+template static void sp_test<T3>(GroupPool& sp);
+template static void sp_test<T4>(GroupPool& sp);
+template static void sp_test<T5>(GroupPool& sp);

 int
 main()
 {
  HeapPool sp(pageSize, pageBits);
-  sp.setSizes(pageCount * pageSize);
-  if (! sp.init())
+  sp.setInitPages(7);
+  sp.setMaxPages(7);
+  if (! sp.allocMemory())
    assert(false);
+  GroupPool gp(sp);
  Uint16 s = (Uint16)getpid();
  srandom(s);
  ndbout << "rand " << s << endl;
-  sp_test<sz1>(sp);
-  sp_test<sz2>(sp);
-  sp_test<sz3>(sp);
-  sp_test<sz4>(sp);
-  sp_test<sz5>(sp);
+  int count = 0;
+  while (++count <= 1) {
+    sp_test<T1>(gp);
+    sp_test<T2>(gp);
+    sp_test<T3>(gp);
+    sp_test<T4>(gp);
+    sp_test<T5>(gp);
+  }
  return 0;
 }